Merge commit '1ad9253c9d34ccbce3e7e4ea5d87c266cbf93410'

deplib features commented out due to removal upstream; will add back as a localmod Conflicts: include/llvm/ADT/Triple.h include/llvm/MC/MCAssembler.h include/llvm/Target/TargetFrameLowering.h lib/CodeGen/AsmPrinter/DwarfDebug.cpp lib/CodeGen/AsmPrinter/DwarfDebug.h lib/CodeGen/BranchFolding.cpp lib/LLVMBuild.txt lib/Linker/LinkArchives.cpp lib/MC/MCAssembler.cpp lib/MC/MCELFStreamer.cpp lib/Makefile lib/Target/ARM/ARMExpandPseudoInsts.cpp lib/Target/ARM/ARMFrameLowering.cpp lib/Target/ARM/ARMISelLowering.cpp lib/Target/ARM/ARMSubtarget.h lib/Target/ARM/ARMTargetObjectFile.cpp lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp lib/Target/Mips/MipsInstrFPU.td lib/Target/Mips/MipsInstrInfo.td lib/Target/X86/X86CodeEmitter.cpp lib/Target/X86/X86Subtarget.h lib/VMCore/Module.cpp test/MC/MachO/ARM/nop-armv4-padding.s tools/Makefile tools/llc/llc.cpp tools/lto/LTOModule.cpp tools/lto/lto.cpp
author: Derek Schuff <dschuff@chromium.org> 2013-01-09 16:55:43 -0800
committer: Derek Schuff <dschuff@chromium.org> 2013-01-11 13:47:37 -0800
commit: b770d0e0636a4b5ad61b1ca661caee67576c05fc (patch)
tree: c486ce032d41f97313c50629bd5b879f53e6ccbf
parent: b835840cf112a6178506d834b58aa625f59a8994 (diff)
parent: 1ad9253c9d34ccbce3e7e4ea5d87c266cbf93410 (diff)
1424 files changed, 59323 insertions, 51824 deletions
diff --git a/.arcconfig b/.arcconfig
new file mode 100644
index 0000000000..4711195a1d
--- /dev/null
+++ b/.arcconfig
@@ -0,0 +1,4 @@
+{
+  "project_id" : "llvm",
+  "conduit_uri" : "http://llvm-reviews.chandlerc.com/"
+}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 228732d6a7..c2ccdecd13 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -242,8 +242,7 @@ include(config-ix)
 # invocation time.
 set(LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_HOST_TRIPLE}" CACHE STRING
   "Default target for which LLVM will generate code." )
-set(TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}" CACHE STRING
-  "Default target for which LLVM will generate code." )
+set(TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}")
 
 include(HandleLLVMOptions)
 
diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT
index d056a7f5df..7bd5d54e18 100644
--- a/CODE_OWNERS.TXT
+++ b/CODE_OWNERS.TXT
@@ -51,9 +51,6 @@ N: Venkatraman Govindaraju
 E: venkatra@cs.wisc.edu
 D: Sparc Backend (lib/Target/Sparc/*)
 
-N: Doug Gregor
-D: All parts of Clang not covered by someone else
-
 N: Tobias Grosser
 D: Polly
 
@@ -84,9 +81,6 @@ N: Benjamin Kramer
 E: benny.kra@gmail.com
 D: DWARF Parser
 
-N: Ted Kremenek
-D: Clang Static Analyzer
-
 N: Sergei Larin
 E: slarin@codeaurora.org
 D: VLIW Instruction Scheduling, Packetization
@@ -96,10 +90,6 @@ E: sabre@nondot.org
 W: http://nondot.org/~sabre/
 D: Everything not covered by someone else
 
-N: John McCall
-E: rjmccall@apple.com
-D: Clang LLVM IR generation
-
 N: Jakob Olesen
 D: Register allocators and TableGen
 
@@ -109,7 +99,7 @@ D: XCore Backend
 
 N: Chad Rosier
 E: mcrosier@apple.com
-D: MS-inline asm, Fast-Isel, and the compiler driver
+D: Fast-Isel
 
 N: Nadav Rotem
 E: nrotem@apple.com
@@ -119,10 +109,10 @@ N: Duncan Sands
 E: baldrick@free.fr
 D: DragonEgg
 
-N: Richard Smith
-E: richard@metafoo.co.uk
-D: Clang Semantic Analysis (tools/clang/lib/Sema/* tools/clang/include/clang/Sema/*)
-
 N: Andrew Trick
 E: atrick@apple.com
-D: Instruction Scheduling
+D: IndVar Simplify, Loop Strength Reduction, Instruction Scheduling
+
+N: Bill Wendling
+E: wendling@apple.com
+D: libLTO & IR Linker
diff --git a/CREDITS.TXT b/CREDITS.TXT
index ca94065b3c..bb5e254d5c 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -143,7 +143,7 @@ E: foldr@codedgers.com
 D: Author of llvmc2
 
 N: Dan Gohman
-E: gohman@apple.com
+E: dan433584@gmail.com
 D: Miscellaneous bug fixes
 
 N: David Goodwin
diff --git a/Makefile.common b/Makefile.common
index 55e2b63434..a157abaef2 100644
--- a/Makefile.common
+++ b/Makefile.common
@@ -28,7 +28,7 @@
 #    built in any order.  All DIRS are built in order before PARALLEL_DIRS are
 #    built, which are then built in any order.
 #
-# 4. Source - If specified, this sets the source code filenames.  If this
+# 4. SOURCES - If specified, this sets the source code filenames.  If this
 #    is not set, it defaults to be all of the .cpp, .c, .y, and .l files
 #    in the current directory.
 #
diff --git a/README.txt b/README.txt
index 0d39ed6934..2ebe271b8e 100644
--- a/README.txt
+++ b/README.txt
@@ -13,5 +13,3 @@ assistance with LLVM.
 
 If you're writing a package for LLVM, see docs/Packaging.html for our
 suggestions.
-
-
diff --git a/bindings/python/llvm/disassembler.py b/bindings/python/llvm/disassembler.py
index 5030b989a9..dcef9ac269 100644
--- a/bindings/python/llvm/disassembler.py
+++ b/bindings/python/llvm/disassembler.py
@@ -31,6 +31,9 @@ __all__ = [
 lib = get_library()
 callbacks = {}
 
+# Constants for set_options
+Option_UseMarkup = 1
+
 class Disassembler(LLVMObject):
     """Represents a disassembler instance.
 
@@ -113,6 +116,10 @@ class Disassembler(LLVMObject):
             address += result
             offset += result
 
+    def set_options(self, options):
+        if not lib.LLVMSetDisasmOptions(self, options):
+            raise Exception('Unable to set all disassembler options in %i' % options)
+
 
 def register_library(library):
     library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int,
@@ -125,6 +132,10 @@ def register_library(library):
             c_uint64, c_uint64, c_char_p, c_size_t]
     library.LLVMDisasmInstruction.restype = c_size_t
 
+    library.LLVMSetDisasmOptions.argtypes = [Disassembler, c_uint64]
+    library.LLVMSetDisasmOptions.restype = c_int
+
+
 callbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64,
                                  c_int, c_void_p)
 callbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64,
diff --git a/bindings/python/llvm/tests/test_disassembler.py b/bindings/python/llvm/tests/test_disassembler.py
index 545e8668b6..46d12f7056 100644
--- a/bindings/python/llvm/tests/test_disassembler.py
+++ b/bindings/python/llvm/tests/test_disassembler.py
@@ -1,6 +1,6 @@
 from .base import TestBase
 
-from ..disassembler import Disassembler
+from ..disassembler import Disassembler, Option_UseMarkup
 
 class TestDisassembler(TestBase):
     def test_instantiate(self):
@@ -26,3 +26,14 @@ class TestDisassembler(TestBase):
 
         self.assertEqual(instructions[0], (0, 3, '\tjcxz\t-127'))
         self.assertEqual(instructions[1], (3, 2, '\taddl\t%eax, %edi'))
+
+    def test_set_options(self):
+        sequence = '\x10\x40\x2d\xe9'
+        triple = 'arm-linux-android'
+
+        disassembler = Disassembler(triple)
+        disassembler.set_options(Option_UseMarkup)
+        count, s = disassembler.get_instruction(sequence)
+        print s
+        self.assertEqual(count, 4)
+        self.assertEqual(s, '\tpush\t{<reg:r4>, <reg:lr>}')
diff --git a/cmake/modules/VersionFromVCS.cmake b/cmake/modules/VersionFromVCS.cmake
index d6a2ae5f45..26314d4126 100644
--- a/cmake/modules/VersionFromVCS.cmake
+++ b/cmake/modules/VersionFromVCS.cmake
@@ -20,49 +20,51 @@ function(add_version_info_from_vcs VERS)
   elseif( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.git )
     set(result "${result}git")
     # Try to get a ref-id
-    find_program(git_executable NAMES git git.exe git.cmd)
-    if( git_executable )
-      set(is_git_svn_rev_exact false)
-      execute_process(COMMAND ${git_executable} svn log --limit=1 --oneline
-                      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-                      TIMEOUT 5
-                      RESULT_VARIABLE git_result
-                      OUTPUT_VARIABLE git_output)
-      if( git_result EQUAL 0 )
-        string(REGEX MATCH r[0-9]+ git_svn_rev ${git_output})
-        string(LENGTH "${git_svn_rev}" rev_length)
-        math(EXPR rev_length "${rev_length}-1")
-        string(SUBSTRING "${git_svn_rev}" 1 ${rev_length} git_svn_rev_number)
-        set(SVN_REVISION ${git_svn_rev_number} PARENT_SCOPE)
-        set(git_svn_rev "-svn-${git_svn_rev}")
-
-        # Determine if the HEAD points directly at a subversion revision.
-        execute_process(COMMAND ${git_executable} svn find-rev HEAD
-                        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-                        TIMEOUT 5
-                        RESULT_VARIABLE git_result
-                        OUTPUT_VARIABLE git_output)
+    if( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.git/svn )
+      find_program(git_executable NAMES git git.exe git.cmd)
+      if( git_executable )
+        set(is_git_svn_rev_exact false)
+        execute_process(COMMAND ${git_executable} svn log --limit=1 --oneline
+          WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+          TIMEOUT 5
+          RESULT_VARIABLE git_result
+          OUTPUT_VARIABLE git_output)
         if( git_result EQUAL 0 )
-          string(STRIP "${git_output}" git_head_svn_rev_number)
-          if( git_head_svn_rev_number EQUAL git_svn_rev_number )
-            set(is_git_svn_rev_exact true)
+          string(REGEX MATCH r[0-9]+ git_svn_rev ${git_output})
+          string(LENGTH "${git_svn_rev}" rev_length)
+          math(EXPR rev_length "${rev_length}-1")
+          string(SUBSTRING "${git_svn_rev}" 1 ${rev_length} git_svn_rev_number)
+          set(SVN_REVISION ${git_svn_rev_number} PARENT_SCOPE)
+          set(git_svn_rev "-svn-${git_svn_rev}")
+
+          # Determine if the HEAD points directly at a subversion revision.
+          execute_process(COMMAND ${git_executable} svn find-rev HEAD
+            WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+            TIMEOUT 5
+            RESULT_VARIABLE git_result
+            OUTPUT_VARIABLE git_output)
+          if( git_result EQUAL 0 )
+            string(STRIP "${git_output}" git_head_svn_rev_number)
+            if( git_head_svn_rev_number EQUAL git_svn_rev_number )
+              set(is_git_svn_rev_exact true)
+            endif()
           endif()
+        else()
+          set(git_svn_rev "")
+        endif()
+        execute_process(COMMAND
+          ${git_executable} rev-parse --short HEAD
+          WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+          TIMEOUT 5
+          RESULT_VARIABLE git_result
+          OUTPUT_VARIABLE git_output)
+        if( git_result EQUAL 0 AND NOT is_git_svn_rev_exact )
+          string(STRIP "${git_output}" git_ref_id)
+          set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE)
+          set(result "${result}${git_svn_rev}-${git_ref_id}")
+        else()
+          set(result "${result}${git_svn_rev}")
         endif()
-      else()
-        set(git_svn_rev "")
-      endif()
-      execute_process(COMMAND
-                      ${git_executable} rev-parse --short HEAD
-                      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-                      TIMEOUT 5
-                      RESULT_VARIABLE git_result
-                      OUTPUT_VARIABLE git_output)
-      if( git_result EQUAL 0 AND NOT is_git_svn_rev_exact )
-        string(STRIP "${git_output}" git_ref_id)
-        set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE)
-        set(result "${result}${git_svn_rev}-${git_ref_id}")
-      else()
-        set(result "${result}${git_svn_rev}")
       endif()
     endif()
   endif()
diff --git a/docs/BranchWeightMetadata.rst b/docs/BranchWeightMetadata.rst
index f0df971f87..2667ce3589 100644
--- a/docs/BranchWeightMetadata.rst
+++ b/docs/BranchWeightMetadata.rst
@@ -27,8 +27,8 @@ Supported Instructions
 ``BranchInst``
 ^^^^^^^^^^^^^^
 
-Metadata is only assign to the conditional branches. There are two extra
-operarands, for the true and the false branch.
+Metadata is only assigned to the conditional branches. There are two extra
+operarands for the true and the false branch.
 
 .. code-block:: llvm
 
@@ -41,8 +41,8 @@ operarands, for the true and the false branch.
 ``SwitchInst``
 ^^^^^^^^^^^^^^
 
-Branch weights are assign to every case (including ``default`` case which is
-always case #0).
+Branch weights are assigned to every case (including the ``default`` case which
+is always case #0).
 
 .. code-block:: llvm
 
@@ -55,7 +55,7 @@ always case #0).
 ``IndirectBrInst``
 ^^^^^^^^^^^^^^^^^^
 
-Branch weights are assign to every destination.
+Branch weights are assigned to every destination.
 
 .. code-block:: llvm
 
diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst
index 11174b7bee..ce23667eb3 100644
--- a/docs/CodeGenerator.rst
+++ b/docs/CodeGenerator.rst
@@ -172,7 +172,7 @@ architecture.  These target descriptions often have a large amount of common
 information (e.g., an ``add`` instruction is almost identical to a ``sub``
 instruction).  In order to allow the maximum amount of commonality to be
 factored out, the LLVM code generator uses the
-`TableGen <TableGenFundamentals.html>`_ tool to describe big chunks of the
+:doc:`TableGen <TableGenFundamentals>` tool to describe big chunks of the
 target machine, which allows the use of domain-specific and target-specific
 abstractions to reduce the amount of repetition.
 
@@ -230,7 +230,7 @@ for structures, the alignment requirements for various data types, the size of
 pointers in the target, and whether the target is little-endian or
 big-endian.
 
-.. _targetlowering:
+.. _TargetLowering:
 
 The ``TargetLowering`` class
 ----------------------------
@@ -250,6 +250,8 @@ operations.  Among other things, this class indicates:
 * various high-level characteristics, like whether it is profitable to turn
   division by a constant into a multiplication sequence.
 
+.. _TargetRegisterInfo:
+
 The ``TargetRegisterInfo`` class
 --------------------------------
 
@@ -771,6 +773,8 @@ value of type i1, i8, i16, or i64 would be illegal, as would a DAG that uses a
 SREM or UREM operation.  The `legalize types`_ and `legalize operations`_ phases
 are responsible for turning an illegal DAG into a legal DAG.
 
+.. _SelectionDAG-Process:
+
 SelectionDAG Instruction Selection Process
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -874,7 +878,7 @@ found, the elements are converted to scalars ("scalarizing").
 
 A target implementation tells the legalizer which types are supported (and which
 register class to use for them) by calling the ``addRegisterClass`` method in
-its TargetLowering constructor.
+its ``TargetLowering`` constructor.
 
 .. _legalize operations:
 .. _Legalizer:
@@ -1728,6 +1732,8 @@ This section of the document explains features or design decisions that are
 specific to the code generator for a particular target.  First we start with a
 table that summarizes what features are supported by each target.
 
+.. _target-feature-matrix:
+
 Target Feature Matrix
 ---------------------
 
diff --git a/docs/CodingStandards.rst b/docs/CodingStandards.rst
index 2b6a6acb1f..8003c12497 100644
--- a/docs/CodingStandards.rst
+++ b/docs/CodingStandards.rst
@@ -284,17 +284,10 @@ listed.  We prefer these ``#include``\s to be listed in this order:
 
 #. Main Module Header
 #. Local/Private Headers
-#. ``llvm/*``
-#. ``llvm/Analysis/*``
-#. ``llvm/Assembly/*``
-#. ``llvm/Bitcode/*``
-#. ``llvm/CodeGen/*``
-#. ...
-#. ``llvm/Support/*``
-#. ``llvm/Config/*``
+#. ``llvm/...``
 #. System ``#include``\s
 
-and each category should be sorted by name.
+and each category should be sorted lexicographically by the full path.
 
 The `Main Module Header`_ file applies to ``.cpp`` files which implement an
 interface defined by a ``.h`` file.  This ``#include`` should always be included
diff --git a/docs/CommandGuide/FileCheck.rst b/docs/CommandGuide/FileCheck.rst
index 5e145f620f..256970b362 100644
--- a/docs/CommandGuide/FileCheck.rst
+++ b/docs/CommandGuide/FileCheck.rst
@@ -4,57 +4,57 @@ FileCheck - Flexible pattern matching file verifier
 SYNOPSIS
 --------
 
-**FileCheck** *match-filename* [*--check-prefix=XXX*] [*--strict-whitespace*]
+:program:`FileCheck` *match-filename* [*--check-prefix=XXX*] [*--strict-whitespace*]
 
 DESCRIPTION
 -----------
 
-**FileCheck** reads two files (one from standard input, and one specified on the
-command line) and uses one to verify the other.  This behavior is particularly
-useful for the testsuite, which wants to verify that the output of some tool
-(e.g. llc) contains the expected information (for example, a movsd from esp or
-whatever is interesting).  This is similar to using grep, but it is optimized
-for matching multiple different inputs in one file in a specific order.
+:program:`FileCheck` reads two files (one from standard input, and one
+specified on the command line) and uses one to verify the other.  This
+behavior is particularly useful for the testsuite, which wants to verify that
+the output of some tool (e.g. :program:`llc`) contains the expected information
+(for example, a movsd from esp or whatever is interesting).  This is similar to
+using :program:`grep`, but it is optimized for matching multiple different
+inputs in one file in a specific order.
 
-The *match-filename* file specifies the file that contains the patterns to
+The ``match-filename`` file specifies the file that contains the patterns to
 match.  The file to verify is always read from standard input.
 
 OPTIONS
 -------
 
-**-help**
+.. option:: -help
 
  Print a summary of command line options.
 
-**--check-prefix** *prefix*
+.. option:: --check-prefix prefix
 
- FileCheck searches the contents of *match-filename* for patterns to match.  By
- default, these patterns are prefixed with "``CHECK:``".  If you'd like to use a
- different prefix (e.g. because the same input file is checking multiple
- different tool or options), the **--check-prefix** argument allows you to specify
- a specific prefix to match.
+ FileCheck searches the contents of ``match-filename`` for patterns to match.
+ By default, these patterns are prefixed with "``CHECK:``".  If you'd like to
+ use a different prefix (e.g. because the same input file is checking multiple
+ different tool or options), the :option:`--check-prefix` argument allows you
+ to specify a specific prefix to match.
 
-**--input-file** *filename*
+.. option:: --input-file filename
 
   File to check (defaults to stdin).
 
-**--strict-whitespace**
+.. option:: --strict-whitespace
 
  By default, FileCheck canonicalizes input horizontal whitespace (spaces and
  tabs) which causes it to ignore these differences (a space will match a tab).
- The **--strict-whitespace** argument disables this behavior.
+ The :option:`--strict-whitespace` argument disables this behavior.
 
-
-**-version**
+.. option:: -version
 
  Show the version number of this program.
 
 EXIT STATUS
 -----------
 
-If **FileCheck** verifies that the file matches the expected contents, it exits
-with 0.  Otherwise, if not, or if an error occurs, it will exit with a non-zero
-value.
+If :program:`FileCheck` verifies that the file matches the expected contents,
+it exits with 0.  Otherwise, if not, or if an error occurs, it will exit with a
+non-zero value.
 
 TUTORIAL
 --------
@@ -67,7 +67,6 @@ like this:
 
    ; RUN: llvm-as < %s | llc -march=x86-64 | FileCheck %s
 
-
 This syntax says to pipe the current file ("``%s``") into ``llvm-as``, pipe
 that into ``llc``, then pipe the output of ``llc`` into ``FileCheck``.  This
 means that FileCheck will be verifying its standard input (the llc output)
@@ -93,7 +92,6 @@ against the filename argument specified (the original ``.ll`` file specified by
            ret void
    }
 
-
 Here you can see some "``CHECK:``" lines specified in comments.  Now you can
 see how the file is piped into ``llvm-as``, then ``llc``, and the machine code
 output is what we are verifying.  FileCheck checks the machine code output to
@@ -114,9 +112,10 @@ exists anywhere in the file.
 The FileCheck -check-prefix option
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The FileCheck ``-check-prefix`` option allows multiple test configurations to be
-driven from one .ll file.  This is useful in many circumstances, for example,
-testing different architectural variants with llc.  Here's a simple example:
+The FileCheck :option:`-check-prefix` option allows multiple test
+configurations to be driven from one `.ll` file.  This is useful in many
+circumstances, for example, testing different architectural variants with
+:program:`llc`.  Here's a simple example:
 
 .. code-block:: llvm
 
@@ -194,7 +193,6 @@ can be used:
    ; CHECK: ret i8
    }
 
-
 FileCheck Pattern Matching Syntax
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -225,9 +223,9 @@ FileCheck Variables
 
 It is often useful to match a pattern and then verify that it occurs again
 later in the file.  For codegen tests, this can be useful to allow any register,
-but verify that that register is used consistently later.  To do this, FileCheck
-allows named variables to be defined and substituted into patterns.  Here is a
-simple example:
+but verify that that register is used consistently later.  To do this,
+:program:`FileCheck` allows named variables to be defined and substituted into
+patterns.  Here is a simple example:
 
 .. code-block:: llvm
 
@@ -237,31 +235,30 @@ simple example:
 
 The first check line matches a regex ``%[a-z]+`` and captures it into the
 variable ``REGISTER``.  The second line verifies that whatever is in
-``REGISTER`` occurs later in the file after an "``andw``".  FileCheck variable
-references are always contained in ``[[ ]]`` pairs, and their names can be
-formed with the regex ``[a-zA-Z][a-zA-Z0-9]*``.  If a colon follows the name,
+``REGISTER`` occurs later in the file after an "``andw``".  :program:`FileCheck`
+variable references are always contained in ``[[ ]]`` pairs, and their names can
+be formed with the regex ``[a-zA-Z][a-zA-Z0-9]*``.  If a colon follows the name,
 then it is a definition of the variable; otherwise, it is a use.
 
-FileCheck variables can be defined multiple times, and uses always get the
-latest value.  Note that variables are all read at the start of a "``CHECK``"
-line and are all defined at the end.  This means that if you have something
-like "``CHECK: [[XYZ:.*]]x[[XYZ]]``", the check line will read the previous
-value of the ``XYZ`` variable and define a new one after the match is
-performed.  If you need to do something like this you can probably take
-advantage of the fact that FileCheck is not actually line-oriented when it
-matches, this allows you to define two separate "``CHECK``" lines that match on
-the same line.
+:program:`FileCheck` variables can be defined multiple times, and uses always
+get the latest value.  Variables can also be used later on the same line they
+were defined on. For example:
 
+.. code-block:: llvm
+
+    ; CHECK: op [[REG:r[0-9]+]], [[REG]]
+
+Can be useful if you want the operands of ``op`` to be the same register,
+and don't care exactly which register it is.
 
 FileCheck Expressions
 ~~~~~~~~~~~~~~~~~~~~~
 
-
-Sometimes there's a need to verify output which refers line numbers of the match
-file, e.g. when testing compiler diagnostics. This introduces a certain
-fragility of the match file structure, as CHECK: lines contain absolute line
-numbers in the same file, which have to be updated whenever line numbers change
-due to text addition or deletion.
+Sometimes there's a need to verify output which refers line numbers of the
+match file, e.g. when testing compiler diagnostics.  This introduces a certain
+fragility of the match file structure, as "``CHECK:``" lines contain absolute
+line numbers in the same file, which have to be updated whenever line numbers
+change due to text addition or deletion.
 
 To support this case, FileCheck allows using ``[[@LINE]]``,
 ``[[@LINE+<offset>]]``, ``[[@LINE-<offset>]]`` expressions in patterns. These
diff --git a/docs/CommandGuide/bugpoint.rst b/docs/CommandGuide/bugpoint.rst
index c1b3b6eca6..e4663e5d44 100644
--- a/docs/CommandGuide/bugpoint.rst
+++ b/docs/CommandGuide/bugpoint.rst
@@ -1,19 +1,15 @@
 bugpoint - automatic test case reduction tool
 =============================================
 
-
 SYNOPSIS
 --------
 
-
 **bugpoint** [*options*] [*input LLVM ll/bc files*] [*LLVM passes*] **--args**
 *program arguments*
 
-
 DESCRIPTION
 -----------
 
-
 **bugpoint** narrows down the source of problems in LLVM tools and passes.  It
 can be used to debug three types of failures: optimizer crashes, miscompilations
 by optimizers, or bad native code generation (including problems in the static
@@ -22,82 +18,61 @@ For more information on the design and inner workings of **bugpoint**, as well a
 advice for using bugpoint, see *llvm/docs/Bugpoint.html* in the LLVM
 distribution.
 
-
 OPTIONS
 -------
 
-
-
 **--additional-so** *library*
 
  Load the dynamic shared object *library* into the test program whenever it is
  run.  This is useful if you are debugging programs which depend on non-LLVM
  libraries (such as the X or curses libraries) to run.
 
-
-
 **--append-exit-code**\ =\ *{true,false}*
 
  Append the test programs exit code to the output file so that a change in exit
  code is considered a test failure. Defaults to false.
 
-
-
 **--args** *program args*
 
- Pass all arguments specified after -args to the test program whenever it runs.
- Note that if any of the *program args* start with a '-', you should use:
-
+ Pass all arguments specified after **--args** to the test program whenever it runs.
+ Note that if any of the *program args* start with a "``-``", you should use:
 
- .. code-block:: perl
+ .. code-block:: bash
 
       bugpoint [bugpoint args] --args -- [program args]
 
-
- The "--" right after the **--args** option tells **bugpoint** to consider any
- options starting with ``-`` to be part of the **--args** option, not as options to
- **bugpoint** itself.
-
-
+ The "``--``" right after the **--args** option tells **bugpoint** to consider
+ any options starting with "``-``" to be part of the **--args** option, not as
+ options to **bugpoint** itself.
 
 **--tool-args** *tool args*
 
- Pass all arguments specified after --tool-args to the LLVM tool under test
+ Pass all arguments specified after **--tool-args** to the LLVM tool under test
  (**llc**, **lli**, etc.) whenever it runs.  You should use this option in the
  following way:
 
-
- .. code-block:: perl
+ .. code-block:: bash
 
       bugpoint [bugpoint args] --tool-args -- [tool args]
 
-
- The "--" right after the **--tool-args** option tells **bugpoint** to consider any
- options starting with ``-`` to be part of the **--tool-args** option, not as
- options to **bugpoint** itself. (See **--args**, above.)
-
-
+ The "``--``" right after the **--tool-args** option tells **bugpoint** to
+ consider any options starting with "``-``" to be part of the **--tool-args**
+ option, not as options to **bugpoint** itself. (See **--args**, above.)
 
 **--safe-tool-args** *tool args*
 
  Pass all arguments specified after **--safe-tool-args** to the "safe" execution
  tool.
 
-
-
 **--gcc-tool-args** *gcc tool args*
 
  Pass all arguments specified after **--gcc-tool-args** to the invocation of
  **gcc**.
 
-
-
 **--opt-args** *opt args*
 
  Pass all arguments specified after **--opt-args** to the invocation of **opt**.
 
-
-
 **--disable-{dce,simplifycfg}**
 
  Do not run the specified passes to clean up and reduce the size of the test
@@ -105,36 +80,26 @@ OPTIONS
  reduce test programs.  If you're trying to find a bug in one of these passes,
  **bugpoint** may crash.
 
-
-
 **--enable-valgrind**
 
  Use valgrind to find faults in the optimization phase. This will allow
  bugpoint to find otherwise asymptomatic problems caused by memory
  mis-management.
 
-
-
 **-find-bugs**
 
  Continually randomize the specified passes and run them on the test program
  until a bug is found or the user kills **bugpoint**.
 
-
-
 **-help**
 
  Print a summary of command line options.
 
-
-
 **--input** *filename*
 
  Open *filename* and redirect the standard input of the test program, whenever
  it runs, to come from that file.
 
-
-
 **--load** *plugin*
 
  Load the dynamic object *plugin* into **bugpoint** itself.  This object should
@@ -143,20 +108,15 @@ OPTIONS
  optimizations, use the **-help** and **--load** options together; for example:
 
 
- .. code-block:: perl
+ .. code-block:: bash
 
       bugpoint --load myNewPass.so -help
 
-
-
-
 **--mlimit** *megabytes*
 
  Specifies an upper limit on memory usage of the optimization and codegen. Set
  to zero to disable the limit.
 
-
-
 **--output** *filename*
 
  Whenever the test program produces output on its standard output stream, it
@@ -164,14 +124,10 @@ OPTIONS
  do not use this option, **bugpoint** will attempt to generate a reference output
  by compiling the program with the "safe" backend and running it.
 
-
-
 **--profile-info-file** *filename*
 
  Profile file loaded by **--profile-loader**.
 
-
-
 **--run-{int,jit,llc,custom}**
 
  Whenever the test program is compiled, **bugpoint** should generate code for it
@@ -179,8 +135,6 @@ OPTIONS
  interpreter, the JIT compiler, the static native code compiler, or a
  custom command (see **--exec-command**) respectively.
 
-
-
 **--safe-{llc,custom}**
 
  When debugging a code generator, **bugpoint** should use the specified code
@@ -192,16 +146,12 @@ OPTIONS
  respectively. The interpreter and the JIT backends cannot currently
  be used as the "safe" backends.
 
-
-
 **--exec-command** *command*
 
  This option defines the command to use with the **--run-custom** and
  **--safe-custom** options to execute the bitcode testcase. This can
  be useful for cross-compilation.
 
-
-
 **--compile-command** *command*
 
  This option defines the command to use with the **--compile-custom**
@@ -210,38 +160,28 @@ OPTIONS
  generate a reduced unit test, you may add CHECK directives to the
  testcase and pass the name of an executable compile-command script in this form:
 
-
  .. code-block:: sh
 
       #!/bin/sh
       llc "$@"
       not FileCheck [bugpoint input file].ll < bugpoint-test-program.s
 
-
  This script will "fail" as long as FileCheck passes. So the result
  will be the minimum bitcode that passes FileCheck.
 
-
-
 **--safe-path** *path*
 
  This option defines the path to the command to execute with the
  **--safe-{int,jit,llc,custom}**
  option.
 
-
-
-
 EXIT STATUS
 -----------
 
-
 If **bugpoint** succeeds in finding a problem, it will exit with 0.  Otherwise,
 if an error occurs, it will exit with a non-zero value.
 
-
 SEE ALSO
 --------
 
-
 opt|opt
diff --git a/docs/CommandGuide/lit.rst b/docs/CommandGuide/lit.rst
index 8886fe6a45..1dcaff10bf 100644
--- a/docs/CommandGuide/lit.rst
+++ b/docs/CommandGuide/lit.rst
@@ -1,351 +1,282 @@
 lit - LLVM Integrated Tester
 ============================
 
-
 SYNOPSIS
 --------
 
-
-**lit** [*options*] [*tests*]
-
+:program:`lit` [*options*] [*tests*]
 
 DESCRIPTION
 -----------
 
+:program:`lit` is a portable tool for executing LLVM and Clang style test
+suites, summarizing their results, and providing indication of failures.
+:program:`lit` is designed to be a lightweight testing tool with as simple a
+user interface as possible.
 
-**lit** is a portable tool for executing LLVM and Clang style test suites,
-summarizing their results, and providing indication of failures. **lit** is
-designed to be a lightweight testing tool with as simple a user interface as
-possible.
-
-**lit** should be run with one or more *tests* to run specified on the command
-line. Tests can be either individual test files or directories to search for
-tests (see "TEST DISCOVERY").
+:program:`lit` should be run with one or more *tests* to run specified on the
+command line.  Tests can be either individual test files or directories to
+search for tests (see :ref:`test-discovery`).
 
 Each specified test will be executed (potentially in parallel) and once all
-tests have been run **lit** will print summary information on the number of tests
-which passed or failed (see "TEST STATUS RESULTS"). The **lit** program will
-execute with a non-zero exit code if any tests fail.
-
-By default **lit** will use a succinct progress display and will only print
-summary information for test failures. See "OUTPUT OPTIONS" for options
-controlling the **lit** progress display and output.
+tests have been run :program:`lit` will print summary information on the number
+of tests which passed or failed (see :ref:`test-status-results`).  The
+:program:`lit` program will execute with a non-zero exit code if any tests
+fail.
 
-**lit** also includes a number of options for controlling how tests are executed
-(specific features may depend on the particular test format). See "EXECUTION
-OPTIONS" for more information.
+By default :program:`lit` will use a succinct progress display and will only
+print summary information for test failures.  See :ref:`output-options` for
+options controlling the :program:`lit` progress display and output.
 
-Finally, **lit** also supports additional options for only running a subset of
-the options specified on the command line, see "SELECTION OPTIONS" for
-more information.
+:program:`lit` also includes a number of options for controlling how tests are
+executed (specific features may depend on the particular test format).  See
+:ref:`execution-options` for more information.
 
-Users interested in the **lit** architecture or designing a **lit** testing
-implementation should see "LIT INFRASTRUCTURE"
+Finally, :program:`lit` also supports additional options for only running a
+subset of the options specified on the command line, see
+:ref:`selection-options` for more information.
 
+Users interested in the :program:`lit` architecture or designing a
+:program:`lit` testing implementation should see :ref:`lit-infrastructure`.
 
 GENERAL OPTIONS
 ---------------
 
+.. option:: -h, --help
 
+ Show the :program:`lit` help message.
 
-**-h**, **--help**
-
- Show the **lit** help message.
-
-
-
-**-j** *N*, **--threads**\ =\ *N*
-
- Run *N* tests in parallel. By default, this is automatically chosen to match
- the number of detected available CPUs.
-
-
+.. option:: -j N, --threads=N
 
-**--config-prefix**\ =\ *NAME*
+ Run ``N`` tests in parallel.  By default, this is automatically chosen to
+ match the number of detected available CPUs.
 
- Search for *NAME.cfg* and *NAME.site.cfg* when searching for test suites,
- instead of *lit.cfg* and *lit.site.cfg*.
+.. option:: --config-prefix=NAME
 
+ Search for :file:`{NAME}.cfg` and :file:`{NAME}.site.cfg` when searching for
+ test suites, instead of :file:`lit.cfg` and :file:`lit.site.cfg`.
 
+.. option:: --param NAME, --param NAME=VALUE
 
-**--param** *NAME*, **--param** *NAME*\ =\ *VALUE*
-
- Add a user defined parameter *NAME* with the given *VALUE* (or the empty
- string if not given). The meaning and use of these parameters is test suite
+ Add a user defined parameter ``NAME`` with the given ``VALUE`` (or the empty
+ string if not given).  The meaning and use of these parameters is test suite
  dependent.
 
-
-
+.. _output-options:
 
 OUTPUT OPTIONS
 --------------
 
-
-
-**-q**, **--quiet**
+.. option:: -q, --quiet
 
  Suppress any output except for test failures.
 
-
-
-**-s**, **--succinct**
+.. option:: -s, --succinct
 
  Show less output, for example don't show information on tests that pass.
 
-
-
-**-v**, **--verbose**
+.. option:: -v, --verbose
 
  Show more information on test failures, for example the entire test output
  instead of just the test result.
 
-
-
-**--no-progress-bar**
+.. option:: --no-progress-bar
 
  Do not use curses based progress bar.
 
-
-
+.. _execution-options:
 
 EXECUTION OPTIONS
 -----------------
 
+.. option:: --path=PATH
 
+ Specify an additional ``PATH`` to use when searching for executables in tests.
 
-**--path**\ =\ *PATH*
-
- Specify an addition *PATH* to use when searching for executables in tests.
-
-
-
-**--vg**
-
- Run individual tests under valgrind (using the memcheck tool). The
- *--error-exitcode* argument for valgrind is used so that valgrind failures will
- cause the program to exit with a non-zero status.
-
- When this option is enabled, **lit** will also automatically provide a
- "valgrind" feature that can be used to conditionally disable (or expect failure
- in) certain tests.
-
-
-
-**--vg-arg**\ =\ *ARG*
-
- When *--vg* is used, specify an additional argument to pass to valgrind itself.
+.. option:: --vg
 
+ Run individual tests under valgrind (using the memcheck tool).  The
+ ``--error-exitcode`` argument for valgrind is used so that valgrind failures
+ will cause the program to exit with a non-zero status.
 
+ When this option is enabled, :program:`lit` will also automatically provide a
+ "``valgrind``" feature that can be used to conditionally disable (or expect
+ failure in) certain tests.
 
-**--vg-leak**
+.. option:: --vg-arg=ARG
 
- When *--vg* is used, enable memory leak checks. When this option is enabled,
- **lit** will also automatically provide a "vg_leak" feature that can be
- used to conditionally disable (or expect failure in) certain tests.
+ When :option:`--vg` is used, specify an additional argument to pass to
+ :program:`valgrind` itself.
 
+.. option:: --vg-leak
 
+ When :option:`--vg` is used, enable memory leak checks.  When this option is
+ enabled, :program:`lit` will also automatically provide a "``vg_leak``"
+ feature that can be used to conditionally disable (or expect failure in)
+ certain tests.
 
+.. option:: --time-tests
 
-**--time-tests**
-
- Track the wall time individual tests take to execute and includes the results in
- the summary output. This is useful for determining which tests in a test suite
- take the most time to execute. Note that this option is most useful with *-j
- 1*.
-
-
+ Track the wall time individual tests take to execute and includes the results
+ in the summary output.  This is useful for determining which tests in a test
+ suite take the most time to execute.  Note that this option is most useful
+ with ``-j 1``.
 
+.. _selection-options:
 
 SELECTION OPTIONS
 -----------------
 
+.. option:: --max-tests=N
 
+ Run at most ``N`` tests and then terminate.
 
-**--max-tests**\ =\ *N*
-
- Run at most *N* tests and then terminate.
-
-
+.. option:: --max-time=N
 
-**--max-time**\ =\ *N*
+ Spend at most ``N`` seconds (approximately) running tests and then terminate.
 
- Spend at most *N* seconds (approximately) running tests and then terminate.
-
-
-
-**--shuffle**
+.. option:: --shuffle
 
  Run the tests in a random order.
 
-
-
-
 ADDITIONAL OPTIONS
 ------------------
 
+.. option:: --debug
 
+ Run :program:`lit` in debug mode, for debugging configuration issues and
+ :program:`lit` itself.
 
-**--debug**
-
- Run **lit** in debug mode, for debugging configuration issues and **lit** itself.
-
-
-
-**--show-suites**
+.. option:: --show-suites
 
  List the discovered test suites as part of the standard output.
 
-
-
-**--no-tcl-as-sh**
+.. option:: --no-tcl-as-sh
 
  Run Tcl scripts internally (instead of converting to shell scripts).
 
+.. option:: --repeat=N
 
-
-**--repeat**\ =\ *N*
-
- Run each test *N* times. Currently this is primarily useful for timing tests,
- other results are not collated in any reasonable fashion.
-
-
-
+ Run each test ``N`` times.  Currently this is primarily useful for timing
+ tests, other results are not collated in any reasonable fashion.
 
 EXIT STATUS
 -----------
 
-
-**lit** will exit with an exit code of 1 if there are any FAIL or XPASS
-results. Otherwise, it will exit with the status 0. Other exit codes are used
+:program:`lit` will exit with an exit code of 1 if there are any FAIL or XPASS
+results.  Otherwise, it will exit with the status 0.  Other exit codes are used
 for non-test related failures (for example a user error or an internal program
 error).
 
+.. _test-discovery:
 
 TEST DISCOVERY
 --------------
 
+The inputs passed to :program:`lit` can be either individual tests, or entire
+directories or hierarchies of tests to run.  When :program:`lit` starts up, the
+first thing it does is convert the inputs into a complete list of tests to run
+as part of *test discovery*.
 
-The inputs passed to **lit** can be either individual tests, or entire
-directories or hierarchies of tests to run. When **lit** starts up, the first
-thing it does is convert the inputs into a complete list of tests to run as part
-of *test discovery*.
-
-In the **lit** model, every test must exist inside some *test suite*. **lit**
-resolves the inputs specified on the command line to test suites by searching
-upwards from the input path until it finds a *lit.cfg* or *lit.site.cfg*
-file. These files serve as both a marker of test suites and as configuration
-files which **lit** loads in order to understand how to find and run the tests
-inside the test suite.
+In the :program:`lit` model, every test must exist inside some *test suite*.
+:program:`lit` resolves the inputs specified on the command line to test suites
+by searching upwards from the input path until it finds a :file:`lit.cfg` or
+:file:`lit.site.cfg` file.  These files serve as both a marker of test suites
+and as configuration files which :program:`lit` loads in order to understand
+how to find and run the tests inside the test suite.
 
-Once **lit** has mapped the inputs into test suites it traverses the list of
-inputs adding tests for individual files and recursively searching for tests in
-directories.
+Once :program:`lit` has mapped the inputs into test suites it traverses the
+list of inputs adding tests for individual files and recursively searching for
+tests in directories.
 
 This behavior makes it easy to specify a subset of tests to run, while still
 allowing the test suite configuration to control exactly how tests are
-interpreted. In addition, **lit** always identifies tests by the test suite they
-are in, and their relative path inside the test suite. For appropriately
-configured projects, this allows **lit** to provide convenient and flexible
-support for out-of-tree builds.
+interpreted.  In addition, :program:`lit` always identifies tests by the test
+suite they are in, and their relative path inside the test suite.  For
+appropriately configured projects, this allows :program:`lit` to provide
+convenient and flexible support for out-of-tree builds.
 
+.. _test-status-results:
 
 TEST STATUS RESULTS
 -------------------
 
-
 Each test ultimately produces one of the following six results:
 
-
 **PASS**
 
  The test succeeded.
 
-
-
 **XFAIL**
 
- The test failed, but that is expected. This is used for test formats which allow
+ The test failed, but that is expected.  This is used for test formats which allow
  specifying that a test does not currently work, but wish to leave it in the test
  suite.
 
-
-
 **XPASS**
 
- The test succeeded, but it was expected to fail. This is used for tests which
+ The test succeeded, but it was expected to fail.  This is used for tests which
  were specified as expected to fail, but are now succeeding (generally because
  the feature they test was broken and has been fixed).
 
-
-
 **FAIL**
 
  The test failed.
 
-
-
 **UNRESOLVED**
 
- The test result could not be determined. For example, this occurs when the test
+ The test result could not be determined.  For example, this occurs when the test
  could not be run, the test itself is invalid, or the test was interrupted.
 
-
-
 **UNSUPPORTED**
 
- The test is not supported in this environment. This is used by test formats
+ The test is not supported in this environment.  This is used by test formats
  which can report unsupported tests.
 
-
-
 Depending on the test format tests may produce additional information about
-their status (generally only for failures). See the Output|"OUTPUT OPTIONS"
+their status (generally only for failures).  See the :ref:`output-options`
 section for more information.
 
+.. _lit-infrastructure:
 
 LIT INFRASTRUCTURE
 ------------------
 
+This section describes the :program:`lit` testing architecture for users interested in
+creating a new :program:`lit` testing implementation, or extending an existing one.
 
-This section describes the **lit** testing architecture for users interested in
-creating a new **lit** testing implementation, or extending an existing one.
-
-**lit** proper is primarily an infrastructure for discovering and running
+:program:`lit` proper is primarily an infrastructure for discovering and running
 arbitrary tests, and to expose a single convenient interface to these
-tests. **lit** itself doesn't know how to run tests, rather this logic is
+tests. :program:`lit` itself doesn't know how to run tests, rather this logic is
 defined by *test suites*.
 
 TEST SUITES
 ~~~~~~~~~~~
 
-
-As described in "TEST DISCOVERY", tests are always located inside a *test
-suite*. Test suites serve to define the format of the tests they contain, the
+As described in :ref:`test-discovery`, tests are always located inside a *test
+suite*.  Test suites serve to define the format of the tests they contain, the
 logic for finding those tests, and any additional information to run the tests.
 
-**lit** identifies test suites as directories containing *lit.cfg* or
-*lit.site.cfg* files (see also **--config-prefix**). Test suites are initially
-discovered by recursively searching up the directory hierarchy for all the input
-files passed on the command line. You can use **--show-suites** to display the
-discovered test suites at startup.
+:program:`lit` identifies test suites as directories containing ``lit.cfg`` or
+``lit.site.cfg`` files (see also :option:`--config-prefix`).  Test suites are
+initially discovered by recursively searching up the directory hierarchy for
+all the input files passed on the command line.  You can use
+:option:`--show-suites` to display the discovered test suites at startup.
 
-Once a test suite is discovered, its config file is loaded. Config files
-themselves are Python modules which will be executed. When the config file is
+Once a test suite is discovered, its config file is loaded.  Config files
+themselves are Python modules which will be executed.  When the config file is
 executed, two important global variables are predefined:
 
-
 **lit**
 
  The global **lit** configuration object (a *LitConfig* instance), which defines
  the builtin test formats, global configuration parameters, and other helper
  routines for implementing test configurations.
 
-
-
 **config**
 
  This is the config object (a *TestingConfig* instance) for the test suite,
- which the config file is expected to populate. The following variables are also
+ which the config file is expected to populate.  The following variables are also
  available on the *config* object, some of which must be set by the config and
  others are optional or predefined:
 
@@ -353,135 +284,133 @@ executed, two important global variables are predefined:
  diagnostics.
 
  **test_format** *[required]* The test format object which will be used to
- discover and run tests in the test suite. Generally this will be a builtin test
+ discover and run tests in the test suite.  Generally this will be a builtin test
  format available from the *lit.formats* module.
 
- **test_src_root** The filesystem path to the test suite root. For out-of-dir
+ **test_src_root** The filesystem path to the test suite root.  For out-of-dir
  builds this is the directory that will be scanned for tests.
 
  **test_exec_root** For out-of-dir builds, the path to the test suite root inside
- the object directory. This is where tests will be run and temporary output files
+ the object directory.  This is where tests will be run and temporary output files
  placed.
 
  **environment** A dictionary representing the environment to use when executing
  tests in the suite.
 
  **suffixes** For **lit** test formats which scan directories for tests, this
- variable is a list of suffixes to identify test files. Used by: *ShTest*,
+ variable is a list of suffixes to identify test files.  Used by: *ShTest*,
  *TclTest*.
 
  **substitutions** For **lit** test formats which substitute variables into a test
- script, the list of substitutions to perform. Used by: *ShTest*, *TclTest*.
+ script, the list of substitutions to perform.  Used by: *ShTest*, *TclTest*.
 
  **unsupported** Mark an unsupported directory, all tests within it will be
- reported as unsupported. Used by: *ShTest*, *TclTest*.
+ reported as unsupported.  Used by: *ShTest*, *TclTest*.
 
  **parent** The parent configuration, this is the config object for the directory
  containing the test suite, or None.
 
- **root** The root configuration. This is the top-most **lit** configuration in
+ **root** The root configuration.  This is the top-most :program:`lit` configuration in
  the project.
 
  **on_clone** The config is actually cloned for every subdirectory inside a test
- suite, to allow local configuration on a per-directory basis. The *on_clone*
+ suite, to allow local configuration on a per-directory basis.  The *on_clone*
  variable can be set to a Python function which will be called whenever a
- configuration is cloned (for a subdirectory). The function should takes three
+ configuration is cloned (for a subdirectory).  The function should takes three
  arguments: (1) the parent configuration, (2) the new configuration (which the
  *on_clone* function will generally modify), and (3) the test path to the new
  directory being scanned.
 
-
-
-
 TEST DISCOVERY
 ~~~~~~~~~~~~~~
 
-
-Once test suites are located, **lit** recursively traverses the source directory
-(following *test_src_root*) looking for tests. When **lit** enters a
-sub-directory, it first checks to see if a nested test suite is defined in that
-directory. If so, it loads that test suite recursively, otherwise it
-instantiates a local test config for the directory (see "LOCAL CONFIGURATION
-FILES").
+Once test suites are located, :program:`lit` recursively traverses the source
+directory (following *test_src_root*) looking for tests.  When :program:`lit`
+enters a sub-directory, it first checks to see if a nested test suite is
+defined in that directory.  If so, it loads that test suite recursively,
+otherwise it instantiates a local test config for the directory (see
+:ref:`local-configuration-files`).
 
 Tests are identified by the test suite they are contained within, and the
-relative path inside that suite. Note that the relative path may not refer to an
-actual file on disk; some test formats (such as *GoogleTest*) define "virtual
-tests" which have a path that contains both the path to the actual test file and
-a subpath to identify the virtual test.
+relative path inside that suite.  Note that the relative path may not refer to
+an actual file on disk; some test formats (such as *GoogleTest*) define
+"virtual tests" which have a path that contains both the path to the actual
+test file and a subpath to identify the virtual test.
 
+.. _local-configuration-files:
 
 LOCAL CONFIGURATION FILES
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
-
-When **lit** loads a subdirectory in a test suite, it instantiates a local test
-configuration by cloning the configuration for the parent direction -- the root
-of this configuration chain will always be a test suite. Once the test
-configuration is cloned **lit** checks for a *lit.local.cfg* file in the
-subdirectory. If present, this file will be loaded and can be used to specialize
-the configuration for each individual directory. This facility can be used to
-define subdirectories of optional tests, or to change other configuration
-parameters -- for example, to change the test format, or the suffixes which
-identify test files.
-
+When :program:`lit` loads a subdirectory in a test suite, it instantiates a
+local test configuration by cloning the configuration for the parent direction
+--- the root of this configuration chain will always be a test suite.  Once the
+test configuration is cloned :program:`lit` checks for a *lit.local.cfg* file
+in the subdirectory.  If present, this file will be loaded and can be used to
+specialize the configuration for each individual directory.  This facility can
+be used to define subdirectories of optional tests, or to change other
+configuration parameters --- for example, to change the test format, or the
+suffixes which identify test files.
 
 TEST RUN OUTPUT FORMAT
 ~~~~~~~~~~~~~~~~~~~~~~
 
+The :program:`lit` output for a test run conforms to the following schema, in
+both short and verbose modes (although in short mode no PASS lines will be
+shown).  This schema has been chosen to be relatively easy to reliably parse by
+a machine (for example in buildbot log scraping), and for other tools to
+generate.
 
-The **lit** output for a test run conforms to the following schema, in both
-short and verbose modes (although in short mode no PASS lines will be shown).
-This schema has been chosen to be relatively easy to reliably parse by a machine
-(for example in buildbot log scraping), and for other tools to generate.
+Each test result is expected to appear on a line that matches:
 
-Each test result is expected to appear on a line that matches::
+.. code-block:: none
 
   <result code>: <test name> (<progress info>)
 
-where <result-code> is a standard test result such as PASS, FAIL, XFAIL, XPASS,
-UNRESOLVED, or UNSUPPORTED. The performance result codes of IMPROVED and
+where ``<result-code>`` is a standard test result such as PASS, FAIL, XFAIL,
+XPASS, UNRESOLVED, or UNSUPPORTED.  The performance result codes of IMPROVED and
 REGRESSED are also allowed.
 
-The <test name> field can consist of an arbitrary string containing no newline.
+The ``<test name>`` field can consist of an arbitrary string containing no
+newline.
 
-The <progress info> field can be used to report progress information such as
-(1/300) or can be empty, but even when empty the parentheses are required.
+The ``<progress info>`` field can be used to report progress information such
+as (1/300) or can be empty, but even when empty the parentheses are required.
 
 Each test result may include additional (multiline) log information in the
-following format::
+following format:
+
+.. code-block:: none
 
   <log delineator> TEST '(<test name>)' <trailing delineator>
   ... log message ...
   <log delineator>
 
-where <test name> should be the name of a preceding reported test, <log
-delineator> is a string of '\*' characters *at least* four characters long (the
-recommended length is 20), and <trailing delineator> is an arbitrary (unparsed)
-string.
+where ``<test name>`` should be the name of a preceding reported test, ``<log
+delineator>`` is a string of "*" characters *at least* four characters long
+(the recommended length is 20), and ``<trailing delineator>`` is an arbitrary
+(unparsed) string.
 
 The following is an example of a test run output which consists of four tests A,
-B, C, and D, and a log message for the failing test C::
+B, C, and D, and a log message for the failing test C:
+
+.. code-block:: none
 
   PASS: A (1 of 4)
   PASS: B (2 of 4)
   FAIL: C (3 of 4)
-  \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\* TEST 'C' FAILED \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*
+  ******************** TEST 'C' FAILED ********************
   Test 'C' failed as a result of exit code 1.
-  \*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*
+  ********************
   PASS: D (4 of 4)
 
-
 LIT EXAMPLE TESTS
 ~~~~~~~~~~~~~~~~~
 
-
-The **lit** distribution contains several example implementations of test suites
-in the *ExampleTests* directory.
-
+The :program:`lit` distribution contains several example implementations of
+test suites in the *ExampleTests* directory.
 
 SEE ALSO
 --------
 
-
 valgrind(1)
diff --git a/docs/CommandGuide/llc.rst b/docs/CommandGuide/llc.rst
index 6f1c486c3f..70354b0343 100644
--- a/docs/CommandGuide/llc.rst
+++ b/docs/CommandGuide/llc.rst
@@ -1,251 +1,187 @@
 llc - LLVM static compiler
 ==========================
 
-
 SYNOPSIS
 --------
 
-
-**llc** [*options*] [*filename*]
-
+:program:`llc` [*options*] [*filename*]
 
 DESCRIPTION
 -----------
 
-
-The **llc** command compiles LLVM source inputs into assembly language for a
-specified architecture.  The assembly language output can then be passed through
-a native assembler and linker to generate a native executable.
+The :program:`llc` command compiles LLVM source inputs into assembly language
+for a specified architecture.  The assembly language output can then be passed
+through a native assembler and linker to generate a native executable.
 
 The choice of architecture for the output assembly code is automatically
-determined from the input file, unless the **-march** option is used to override
-the default.
-
+determined from the input file, unless the :option:`-march` option is used to
+override the default.
 
 OPTIONS
 -------
 
+If ``filename`` is "``-``" or omitted, :program:`llc` reads from standard input.
+Otherwise, it will from ``filename``.  Inputs can be in either the LLVM assembly
+language format (``.ll``) or the LLVM bitcode format (``.bc``).
 
-If *filename* is - or omitted, **llc** reads from standard input.  Otherwise, it
-will from *filename*.  Inputs can be in either the LLVM assembly language
-format (.ll) or the LLVM bitcode format (.bc).
+If the :option:`-o` option is omitted, then :program:`llc` will send its output
+to standard output if the input is from standard input.  If the :option:`-o`
+option specifies "``-``", then the output will also be sent to standard output.
 
-If the **-o** option is omitted, then **llc** will send its output to standard
-output if the input is from standard input.  If the **-o** option specifies -,
-then the output will also be sent to standard output.
+If no :option:`-o` option is specified and an input file other than "``-``" is
+specified, then :program:`llc` creates the output filename by taking the input
+filename, removing any existing ``.bc`` extension, and adding a ``.s`` suffix.
 
-If no **-o** option is specified and an input file other than - is specified,
-then **llc** creates the output filename by taking the input filename,
-removing any existing *.bc* extension, and adding a *.s* suffix.
-
-Other **llc** options are as follows:
+Other :program:`llc` options are described below.
 
 End-user Options
 ~~~~~~~~~~~~~~~~
 
-
-
-**-help**
+.. option:: -help
 
  Print a summary of command line options.
 
+.. option:: -O=uint
 
+ Generate code at different optimization levels.  These correspond to the
+ ``-O0``, ``-O1``, ``-O2``, and ``-O3`` optimization levels used by
+ :program:`llvm-gcc` and :program:`clang`.
 
-**-O**\ =\ *uint*
-
- Generate code at different optimization levels. These correspond to the *-O0*,
- *-O1*, *-O2*, and *-O3* optimization levels used by **llvm-gcc** and
- **clang**.
-
-
-
-**-mtriple**\ =\ *target triple*
+.. option:: -mtriple=<target triple>
 
  Override the target triple specified in the input file with the specified
  string.
 
-
-
-**-march**\ =\ *arch*
+.. option:: -march=<arch>
 
  Specify the architecture for which to generate assembly, overriding the target
- encoded in the input file.  See the output of **llc -help** for a list of
+ encoded in the input file.  See the output of ``llc -help`` for a list of
  valid architectures.  By default this is inferred from the target triple or
  autodetected to the current architecture.
 
-
-
-**-mcpu**\ =\ *cpuname*
+.. option:: -mcpu=<cpuname>
 
  Specify a specific chip in the current architecture to generate code for.
  By default this is inferred from the target triple and autodetected to
  the current architecture.  For a list of available CPUs, use:
- **llvm-as < /dev/null | llc -march=xyz -mcpu=help**
 
+ .. code-block:: none
 
+   llvm-as < /dev/null | llc -march=xyz -mcpu=help
 
-**-mattr**\ =\ *a1,+a2,-a3,...*
+.. option:: -mattr=a1,+a2,-a3,...
 
  Override or control specific attributes of the target, such as whether SIMD
  operations are enabled or not.  The default set of attributes is set by the
  current CPU.  For a list of available attributes, use:
- **llvm-as < /dev/null | llc -march=xyz -mattr=help**
 
+ .. code-block:: none
 
+   llvm-as < /dev/null | llc -march=xyz -mattr=help
 
-**--disable-fp-elim**
+.. option:: --disable-fp-elim
 
  Disable frame pointer elimination optimization.
 
-
-
-**--disable-excess-fp-precision**
+.. option:: --disable-excess-fp-precision
 
  Disable optimizations that may produce excess precision for floating point.
  Note that this option can dramatically slow down code on some systems
  (e.g. X86).
 
-
-
-**--enable-no-infs-fp-math**
+.. option:: --enable-no-infs-fp-math
 
  Enable optimizations that assume no Inf values.
 
-
-
-**--enable-no-nans-fp-math**
+.. option:: --enable-no-nans-fp-math
 
  Enable optimizations that assume no NAN values.
 
-
-
-**--enable-unsafe-fp-math**
+.. option:: --enable-unsafe-fp-math
 
  Enable optimizations that make unsafe assumptions about IEEE math (e.g. that
  addition is associative) or may not work for all input ranges.  These
  optimizations allow the code generator to make use of some instructions which
- would otherwise not be usable (such as fsin on X86).
-
+ would otherwise not be usable (such as ``fsin`` on X86).
 
+.. option:: --enable-correct-eh-support
 
-**--enable-correct-eh-support**
+ Instruct the **lowerinvoke** pass to insert code for correct exception
+ handling support.  This is expensive and is by default omitted for efficiency.
 
- Instruct the **lowerinvoke** pass to insert code for correct exception handling
- support.  This is expensive and is by default omitted for efficiency.
-
-
-
-**--stats**
+.. option:: --stats
 
  Print statistics recorded by code-generation passes.
 
-
-
-**--time-passes**
+.. option:: --time-passes
 
  Record the amount of time needed for each pass and print a report to standard
  error.
 
+.. option:: --load=<dso_path>
 
-
-**--load**\ =\ *dso_path*
-
- Dynamically load *dso_path* (a path to a dynamically shared object) that
- implements an LLVM target. This will permit the target name to be used with the
- **-march** option so that code can be generated for that target.
-
-
-
+ Dynamically load ``dso_path`` (a path to a dynamically shared object) that
+ implements an LLVM target.  This will permit the target name to be used with
+ the :option:`-march` option so that code can be generated for that target.
 
 Tuning/Configuration Options
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-
-
-**--print-machineinstrs**
+.. option:: --print-machineinstrs
 
  Print generated machine code between compilation phases (useful for debugging).
 
+.. option:: --regalloc=<allocator>
 
-
-**--regalloc**\ =\ *allocator*
-
- Specify the register allocator to use. The default *allocator* is *local*.
+ Specify the register allocator to use.  The default ``allocator`` is *local*.
  Valid register allocators are:
 
-
  *simple*
 
   Very simple "always spill" register allocator
 
-
-
  *local*
 
   Local register allocator
 
-
-
  *linearscan*
 
   Linear scan global register allocator
 
-
-
  *iterativescan*
 
   Iterative scan global register allocator
 
-
-
-
-
-**--spiller**\ =\ *spiller*
+.. option:: --spiller=<spiller>
 
  Specify the spiller to use for register allocators that support it.  Currently
- this option is used only by the linear scan register allocator. The default
- *spiller* is *local*.  Valid spillers are:
-
+ this option is used only by the linear scan register allocator.  The default
+ ``spiller`` is *local*.  Valid spillers are:
 
  *simple*
 
   Simple spiller
 
-
-
  *local*
 
   Local spiller
 
-
-
-
-
-
 Intel IA-32-specific Options
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+.. option:: --x86-asm-syntax=[att|intel]
 
-
-**--x86-asm-syntax=att|intel**
-
- Specify whether to emit assembly code in AT&T syntax (the default) or intel
+ Specify whether to emit assembly code in AT&T syntax (the default) or Intel
  syntax.
 
-
-
-
-
 EXIT STATUS
 -----------
 
-
-If **llc** succeeds, it will exit with 0.  Otherwise, if an error occurs,
-it will exit with a non-zero value.
-
+If :program:`llc` succeeds, it will exit with 0.  Otherwise, if an error
+occurs, it will exit with a non-zero value.
 
 SEE ALSO
 --------
 
+lli
 
-lli|lli
diff --git a/docs/CommandGuide/llvm-cov.rst b/docs/CommandGuide/llvm-cov.rst
index 09275f6af7..524f24087f 100644
--- a/docs/CommandGuide/llvm-cov.rst
+++ b/docs/CommandGuide/llvm-cov.rst
@@ -1,51 +1,39 @@
 llvm-cov - emit coverage information
 ====================================
 
-
 SYNOPSIS
 --------
 
-
-**llvm-cov** [-gcno=filename] [-gcda=filename] [dump]
-
+:program:`llvm-cov` [-gcno=filename] [-gcda=filename] [dump]
 
 DESCRIPTION
 -----------
 
-
-The experimental **llvm-cov** tool reads in description file generated by compiler
-and coverage data file generated by instrumented program. This program assumes
-that the description and data file uses same format as gcov files.
-
+The experimental :program:`llvm-cov` tool reads in description file generated
+by compiler and coverage data file generated by instrumented program.  This
+program assumes that the description and data file uses same format as gcov
+files.
 
 OPTIONS
 -------
 
+.. option:: -gcno=filename
 
+ This option selects input description file generated by compiler while
+ instrumenting program.
 
-**-gcno=filename]**
-
- This option selects input description file generated by compiler while instrumenting
- program.
-
-
-
-**-gcda=filename]**
+.. option:: -gcda=filename
 
  This option selects coverage data file generated by instrumented compiler.
 
+.. option:: -dump
 
-
-**-dump**
-
- This options enables output dump that is suitable for a developer to help debug
- **llvm-cov** itself.
-
-
-
+ This options enables output dump that is suitable for a developer to help
+ debug :program:`llvm-cov` itself.
 
 EXIT STATUS
 -----------
 
+:program:`llvm-cov` returns 1 if it cannot read input files.  Otherwise, it
+exits with zero.
 
-**llvm-cov** returns 1 if it cannot read input files. Otherwise, it exits with zero.
diff --git a/docs/CommandGuide/llvm-link.rst b/docs/CommandGuide/llvm-link.rst
index 63019d7cca..e4f2228841 100644
--- a/docs/CommandGuide/llvm-link.rst
+++ b/docs/CommandGuide/llvm-link.rst
@@ -1,96 +1,74 @@
 llvm-link - LLVM linker
 =======================
 
-
 SYNOPSIS
 --------
 
-
-**llvm-link** [*options*] *filename ...*
-
+:program:`llvm-link` [*options*] *filename ...*
 
 DESCRIPTION
 -----------
 
+:program:`llvm-link` takes several LLVM bitcode files and links them together
+into a single LLVM bitcode file.  It writes the output file to standard output,
+unless the :option:`-o` option is used to specify a filename.
 
-**llvm-link** takes several LLVM bitcode files and links them together into a
-single LLVM bitcode file.  It writes the output file to standard output, unless
-the **-o** option is used to specify a filename.
-
-**llvm-link** attempts to load the input files from the current directory.  If
-that fails, it looks for each file in each of the directories specified by the
-**-L** options on the command line.  The library search paths are global; each
-one is searched for every input file if necessary.  The directories are searched
-in the order they were specified on the command line.
-
+:program:`llvm-link` attempts to load the input files from the current
+directory.  If that fails, it looks for each file in each of the directories
+specified by the :option:`-L` options on the command line.  The library search
+paths are global; each one is searched for every input file if necessary.  The
+directories are searched in the order they were specified on the command line.
 
 OPTIONS
 -------
 
+.. option:: -L directory
 
+ Add the specified ``directory`` to the library search path.  When looking for
+ libraries, :program:`llvm-link` will look in path name for libraries.  This
+ option can be specified multiple times; :program:`llvm-link` will search
+ inside these directories in the order in which they were specified on the
+ command line.
 
-**-L** *directory*
-
- Add the specified *directory* to the library search path.  When looking for
- libraries, **llvm-link** will look in path name for libraries.  This option can be
- specified multiple times; **llvm-link** will search inside these directories in
- the order in which they were specified on the command line.
-
-
-
-**-f**
-
- Enable binary output on terminals.  Normally, **llvm-link** will refuse to
- write raw bitcode output if the output stream is a terminal. With this option,
- **llvm-link** will write raw bitcode regardless of the output device.
-
-
+.. option:: -f
 
-**-o** *filename*
+ Enable binary output on terminals.  Normally, :program:`llvm-link` will refuse
+ to write raw bitcode output if the output stream is a terminal. With this
+ option, :program:`llvm-link` will write raw bitcode regardless of the output
+ device.
 
- Specify the output file name.  If *filename* is ``-``, then **llvm-link** will
- write its output to standard output.
+.. option:: -o filename
 
+ Specify the output file name.  If ``filename`` is "``-``", then
+ :program:`llvm-link` will write its output to standard output.
 
-
-**-S**
+.. option:: -S
 
  Write output in LLVM intermediate language (instead of bitcode).
 
+.. option:: -d
 
-
-**-d**
-
- If specified, **llvm-link** prints a human-readable version of the output
+ If specified, :program:`llvm-link` prints a human-readable version of the output
  bitcode file to standard error.
 
-
-
-**-help**
+.. option:: -help
 
  Print a summary of command line options.
 
+.. option:: -v
 
-
-**-v**
-
- Verbose mode.  Print information about what **llvm-link** is doing.  This
- typically includes a message for each bitcode file linked in and for each
+ Verbose mode.  Print information about what :program:`llvm-link` is doing.
+ This typically includes a message for each bitcode file linked in and for each
  library found.
 
-
-
-
 EXIT STATUS
 -----------
 
-
-If **llvm-link** succeeds, it will exit with 0.  Otherwise, if an error
+If :program:`llvm-link` succeeds, it will exit with 0.  Otherwise, if an error
 occurs, it will exit with a non-zero value.
 
-
 SEE ALSO
 --------
 
+gccld
 
-gccld|gccld
diff --git a/docs/CommandGuide/llvm-stress.rst b/docs/CommandGuide/llvm-stress.rst
index 44aa32c755..fb006f562b 100644
--- a/docs/CommandGuide/llvm-stress.rst
+++ b/docs/CommandGuide/llvm-stress.rst
@@ -1,48 +1,34 @@
 llvm-stress - generate random .ll files
 =======================================
 
-
 SYNOPSIS
 --------
 
-
-**llvm-stress** [-size=filesize] [-seed=initialseed] [-o=outfile]
-
+:program:`llvm-stress` [-size=filesize] [-seed=initialseed] [-o=outfile]
 
 DESCRIPTION
 -----------
 
-
-The **llvm-stress** tool is used to generate random .ll files that can be used to
-test different components of LLVM.
-
+The :program:`llvm-stress` tool is used to generate random ``.ll`` files that
+can be used to test different components of LLVM.
 
 OPTIONS
 -------
 
-
-
-**-o** *filename*
+.. option:: -o filename
 
  Specify the output filename.
 
+.. option:: -size size
 
+ Specify the size of the generated ``.ll`` file.
 
-**-size** *size*
-
- Specify the size of the generated .ll file.
-
-
-
-**-seed** *seed*
+.. option:: -seed seed
 
  Specify the seed to be used for the randomly generated instructions.
 
-
-
-
 EXIT STATUS
 -----------
 
+:program:`llvm-stress` returns 0.
 
-**llvm-stress** returns 0.
diff --git a/docs/CommandGuide/opt.rst b/docs/CommandGuide/opt.rst
index 72f19034c9..179c297c22 100644
--- a/docs/CommandGuide/opt.rst
+++ b/docs/CommandGuide/opt.rst
@@ -1,183 +1,143 @@
 opt - LLVM optimizer
 ====================
 
-
 SYNOPSIS
 --------
 
-
-**opt** [*options*] [*filename*]
-
+:program:`opt` [*options*] [*filename*]
 
 DESCRIPTION
 -----------
 
+The :program:`opt` command is the modular LLVM optimizer and analyzer.  It
+takes LLVM source files as input, runs the specified optimizations or analyses
+on it, and then outputs the optimized file or the analysis results.  The
+function of :program:`opt` depends on whether the :option:`-analyze` option is
+given.
 
-The **opt** command is the modular LLVM optimizer and analyzer.  It takes LLVM
-source files as input, runs the specified optimizations or analyses on it, and then
-outputs the optimized file or the analysis results.  The function of
-**opt** depends on whether the **-analyze** option is given.
-
-When **-analyze** is specified, **opt** performs various analyses of the input
-source.  It will usually print the results on standard output, but in a few
-cases, it will print output to standard error or generate a file with the
-analysis output, which is usually done when the output is meant for another
+When :option:`-analyze` is specified, :program:`opt` performs various analyses
+of the input source.  It will usually print the results on standard output, but
+in a few cases, it will print output to standard error or generate a file with
+the analysis output, which is usually done when the output is meant for another
 program.
 
-While **-analyze** is *not* given, **opt** attempts to produce an optimized
-output file.  The optimizations available via **opt** depend upon what
-libraries were linked into it as well as any additional libraries that have
-been loaded with the **-load** option.  Use the **-help** option to determine
-what optimizations you can use.
-
-If *filename* is omitted from the command line or is *-*, **opt** reads its
-input from standard input. Inputs can be in either the LLVM assembly language
-format (.ll) or the LLVM bitcode format (.bc).
+While :option:`-analyze` is *not* given, :program:`opt` attempts to produce an
+optimized output file.  The optimizations available via :program:`opt` depend
+upon what libraries were linked into it as well as any additional libraries
+that have been loaded with the :option:`-load` option.  Use the :option:`-help`
+option to determine what optimizations you can use.
 
-If an output filename is not specified with the **-o** option, **opt**
-writes its output to the standard output.
+If ``filename`` is omitted from the command line or is "``-``", :program:`opt`
+reads its input from standard input.  Inputs can be in either the LLVM assembly
+language format (``.ll``) or the LLVM bitcode format (``.bc``).
 
+If an output filename is not specified with the :option:`-o` option,
+:program:`opt` writes its output to the standard output.
 
 OPTIONS
 -------
 
+.. option:: -f
 
+ Enable binary output on terminals.  Normally, :program:`opt` will refuse to
+ write raw bitcode output if the output stream is a terminal.  With this option,
+ :program:`opt` will write raw bitcode regardless of the output device.
 
-**-f**
-
- Enable binary output on terminals.  Normally, **opt** will refuse to
- write raw bitcode output if the output stream is a terminal. With this option,
- **opt** will write raw bitcode regardless of the output device.
-
-
-
-**-help**
+.. option:: -help
 
  Print a summary of command line options.
 
-
-
-**-o** *filename*
+.. option:: -o <filename>
 
  Specify the output filename.
 
-
-
-**-S**
+.. option:: -S
 
  Write output in LLVM intermediate language (instead of bitcode).
 
+.. option:: -{passname}
 
+ :program:`opt` provides the ability to run any of LLVM's optimization or
+ analysis passes in any order.  The :option:`-help` option lists all the passes
+ available.  The order in which the options occur on the command line are the
+ order in which they are executed (within pass constraints).
 
-**-{passname}**
-
- **opt** provides the ability to run any of LLVM's optimization or analysis passes
- in any order. The **-help** option lists all the passes available. The order in
- which the options occur on the command line are the order in which they are
- executed (within pass constraints).
-
-
-
-**-std-compile-opts**
+.. option:: -std-compile-opts
 
  This is short hand for a standard list of *compile time optimization* passes.
- This is typically used to optimize the output from the llvm-gcc front end. It
- might be useful for other front end compilers as well. To discover the full set
- of options available, use the following command:
-
+ This is typically used to optimize the output from the llvm-gcc front end.  It
+ might be useful for other front end compilers as well.  To discover the full
+ set of options available, use the following command:
 
  .. code-block:: sh
 
      llvm-as < /dev/null | opt -std-compile-opts -disable-output -debug-pass=Arguments
 
+.. option:: -disable-inlining
 
+ This option is only meaningful when :option:`-std-compile-opts` is given.  It
+ simply removes the inlining pass from the standard list.
 
+.. option:: -disable-opt
 
-**-disable-inlining**
-
- This option is only meaningful when **-std-compile-opts** is given. It simply
- removes the inlining pass from the standard list.
-
-
-
-**-disable-opt**
-
- This option is only meaningful when **-std-compile-opts** is given. It disables
- most, but not all, of the **-std-compile-opts**. The ones that remain are
- **-verify**, **-lower-setjmp**, and **-funcresolve**.
+ This option is only meaningful when :option:`-std-compile-opts` is given.  It
+ disables most, but not all, of the :option:`-std-compile-opts`.  The ones that
+ remain are :option:`-verify`, :option:`-lower-setjmp`, and
+ :option:`-funcresolve`.
 
-
-
-**-strip-debug**
+.. option:: -strip-debug
 
  This option causes opt to strip debug information from the module before
- applying other optimizations. It is essentially the same as **-strip** but it
- ensures that stripping of debug information is done first.
-
-
-
-**-verify-each**
-
- This option causes opt to add a verify pass after every pass otherwise specified
- on the command line (including **-verify**).  This is useful for cases where it
- is suspected that a pass is creating an invalid module but it is not clear which
- pass is doing it. The combination of **-std-compile-opts** and **-verify-each**
- can quickly track down this kind of problem.
+ applying other optimizations.  It is essentially the same as :option:`-strip`
+ but it ensures that stripping of debug information is done first.
 
+.. option:: -verify-each
 
+ This option causes opt to add a verify pass after every pass otherwise
+ specified on the command line (including :option:`-verify`).  This is useful
+ for cases where it is suspected that a pass is creating an invalid module but
+ it is not clear which pass is doing it.  The combination of
+ :option:`-std-compile-opts` and :option:`-verify-each` can quickly track down
+ this kind of problem.
 
-**-profile-info-file** *filename*
+.. option:: -profile-info-file <filename>
 
- Specify the name of the file loaded by the -profile-loader option.
+ Specify the name of the file loaded by the ``-profile-loader`` option.
 
-
-
-**-stats**
+.. option:: -stats
 
  Print statistics.
 
-
-
-**-time-passes**
+.. option:: -time-passes
 
  Record the amount of time needed for each pass and print it to standard
  error.
 
+.. option:: -debug
 
+ If this is a debug build, this option will enable debug printouts from passes
+ which use the ``DEBUG()`` macro.  See the `LLVM Programmer's Manual
+ <../ProgrammersManual.html>`_, section ``#DEBUG`` for more information.
 
-**-debug**
-
- If this is a debug build, this option will enable debug printouts
- from passes which use the *DEBUG()* macro.  See the **LLVM Programmer's
- Manual**, section *#DEBUG* for more information.
-
-
-
-**-load**\ =\ *plugin*
-
- Load the dynamic object *plugin*.  This object should register new optimization
- or analysis passes. Once loaded, the object will add new command line options to
- enable various optimizations or analyses.  To see the new complete list of
- optimizations, use the **-help** and **-load** options together. For example:
+.. option:: -load=<plugin>
 
+ Load the dynamic object ``plugin``.  This object should register new
+ optimization or analysis passes.  Once loaded, the object will add new command
+ line options to enable various optimizations or analyses.  To see the new
+ complete list of optimizations, use the :option:`-help` and :option:`-load`
+ options together.  For example:
 
  .. code-block:: sh
 
      opt -load=plugin.so -help
 
-
-
-
-**-p**
+.. option:: -p
 
  Print module after each transformation.
 
-
-
-
 EXIT STATUS
 -----------
 
-
-If **opt** succeeds, it will exit with 0.  Otherwise, if an error
+If :program:`opt` succeeds, it will exit with 0.  Otherwise, if an error
 occurs, it will exit with a non-zero value.
+
diff --git a/docs/CommandGuide/tblgen.rst b/docs/CommandGuide/tblgen.rst
index 2d191676d9..1858ee447d 100644
--- a/docs/CommandGuide/tblgen.rst
+++ b/docs/CommandGuide/tblgen.rst
@@ -1,186 +1,129 @@
 tblgen - Target Description To C++ Code Generator
 =================================================
 
-
 SYNOPSIS
 --------
 
-
-**tblgen** [*options*] [*filename*]
-
+:program:`tblgen` [*options*] [*filename*]
 
 DESCRIPTION
 -----------
 
+:program:`tblgen` translates from target description (``.td``) files into C++
+code that can be included in the definition of an LLVM target library.  Most
+users of LLVM will not need to use this program.  It is only for assisting with
+writing an LLVM target backend.
 
-**tblgen** translates from target description (.td) files into C++ code that can
-be included in the definition of an LLVM target library. Most users of LLVM will
-not need to use this program. It is only for assisting with writing an LLVM
-target backend.
-
-The input and output of **tblgen** is beyond the scope of this short
-introduction. Please see the *CodeGeneration* page in the LLVM documentation.
-
-The *filename* argument specifies the name of a Target Description (.td) file
-to read as input.
+The input and output of :program:`tblgen` is beyond the scope of this short
+introduction.  Please see :doc:`../TableGenFundamentals`.
 
+The *filename* argument specifies the name of a Target Description (``.td``)
+file to read as input.
 
 OPTIONS
 -------
 
-
-
-**-help**
+.. option:: -help
 
  Print a summary of command line options.
 
+.. option:: -o filename
 
+ Specify the output file name.  If ``filename`` is ``-``, then
+ :program:`tblgen` sends its output to standard output.
 
-**-o** *filename*
-
- Specify the output file name.  If *filename* is ``-``, then **tblgen**
- sends its output to standard output.
-
-
-
-**-I** *directory*
-
- Specify where to find other target description files for inclusion. The
- *directory* value should be a full or partial path to a directory that contains
- target description files.
-
-
-
-**-asmparsernum** *N*
+.. option:: -I directory
 
- Make -gen-asm-parser emit assembly writer number *N*.
+ Specify where to find other target description files for inclusion.  The
+ ``directory`` value should be a full or partial path to a directory that
+ contains target description files.
 
+.. option:: -asmparsernum N
 
+ Make -gen-asm-parser emit assembly writer number ``N``.
 
-**-asmwriternum** *N*
+.. option:: -asmwriternum N
 
- Make -gen-asm-writer emit assembly writer number *N*.
+ Make -gen-asm-writer emit assembly writer number ``N``.
 
-
-
-**-class** *class Name*
+.. option:: -class className
 
  Print the enumeration list for this class.
 
-
-
-**-print-records**
+.. option:: -print-records
 
  Print all records to standard output (default).
 
-
-
-**-print-enums**
+.. option:: -print-enums
 
  Print enumeration values for a class
 
-
-
-**-print-sets**
+.. option:: -print-sets
 
  Print expanded sets for testing DAG exprs.
 
-
-
-**-gen-emitter**
+.. option:: -gen-emitter
 
  Generate machine code emitter.
 
-
-
-**-gen-register-info**
+.. option:: -gen-register-info
 
  Generate registers and register classes info.
 
-
-
-**-gen-instr-info**
+.. option:: -gen-instr-info
 
  Generate instruction descriptions.
 
-
-
-**-gen-asm-writer**
+.. option:: -gen-asm-writer
 
  Generate the assembly writer.
 
-
-
-**-gen-disassembler**
+.. option:: -gen-disassembler
 
  Generate disassembler.
 
-
-
-**-gen-pseudo-lowering**
+.. option:: -gen-pseudo-lowering
 
  Generate pseudo instruction lowering.
 
-
-
-**-gen-dag-isel**
+.. option:: -gen-dag-isel
 
  Generate a DAG (Directed Acycle Graph) instruction selector.
 
-
-
-**-gen-asm-matcher**
+.. option:: -gen-asm-matcher
 
  Generate assembly instruction matcher.
 
-
-
-**-gen-dfa-packetizer**
+.. option:: -gen-dfa-packetizer
 
  Generate DFA Packetizer for VLIW targets.
 
-
-
-**-gen-fast-isel**
+.. option:: -gen-fast-isel
 
  Generate a "fast" instruction selector.
 
-
-
-**-gen-subtarget**
+.. option:: -gen-subtarget
 
  Generate subtarget enumerations.
 
-
-
-**-gen-intrinsic**
+.. option:: -gen-intrinsic
 
  Generate intrinsic information.
 
-
-
-**-gen-tgt-intrinsic**
+.. option:: -gen-tgt-intrinsic
 
  Generate target intrinsic information.
 
-
-
-**-gen-enhanced-disassembly-info**
+.. option:: -gen-enhanced-disassembly-info
 
  Generate enhanced disassembly info.
 
-
-
-**-version**
+.. option:: -version
 
  Show the version number of this program.
 
-
-
-
 EXIT STATUS
 -----------
 
-
-If **tblgen** succeeds, it will exit with 0.  Otherwise, if an error
+If :program:`tblgen` succeeds, it will exit with 0.  Otherwise, if an error
 occurs, it will exit with a non-zero value.
diff --git a/docs/DeveloperPolicy.rst b/docs/DeveloperPolicy.rst
index 390901289d..925e769b86 100644
--- a/docs/DeveloperPolicy.rst
+++ b/docs/DeveloperPolicy.rst
@@ -26,8 +26,8 @@ This policy is also designed to accomplish the following objectives:
 
 #. Keep the top of Subversion trees as stable as possible.
 
-#. Establish awareness of the project's `copyright, license, and patent
-   policies`_ with contributors to the project.
+#. Establish awareness of the project's :ref:`copyright, license, and patent
+   policies <copyright-license-patents>` with contributors to the project.
 
 This policy is aimed at frequent contributors to LLVM. People interested in
 contributing one-off patches can do so in an informal way by sending them to the
@@ -401,7 +401,7 @@ Hacker!" in the commit message.
 
 Overall, please do not add contributor names to the source code.
 
-.. _copyright, license, and patent policies:
+.. _copyright-license-patents:
 
 Copyright, License, and Patents
 ===============================
diff --git a/docs/GarbageCollection.html b/docs/GarbageCollection.html
deleted file mode 100644
index 5bc70f1bb0..0000000000
--- a/docs/GarbageCollection.html
+++ /dev/null
@@ -1,1389 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" Content="text/html; charset=UTF-8" >
-  <title>Accurate Garbage Collection with LLVM</title>
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-  <style type="text/css">
-    .rowhead { text-align: left; background: inherit; }
-    .indent { padding-left: 1em; }
-    .optl { color: #BFBFBF; }
-  </style>
-</head>
-<body>
-
-<h1>
-  Accurate Garbage Collection with LLVM
-</h1>
-
-<ol>
-  <li><a href="#introduction">Introduction</a>
-    <ul>
-    <li><a href="#feature">Goals and non-goals</a></li>
-    </ul>
-  </li>
-
-  <li><a href="#quickstart">Getting started</a>
-    <ul>
-    <li><a href="#quickstart-compiler">In your compiler</a></li>
-    <li><a href="#quickstart-runtime">In your runtime library</a></li>
-    <li><a href="#shadow-stack">About the shadow stack</a></li>
-    </ul>
-  </li>
-
-  <li><a href="#core">Core support</a>
-    <ul>
-    <li><a href="#gcattr">Specifying GC code generation:
-      <tt>gc "..."</tt></a></li>
-    <li><a href="#gcroot">Identifying GC roots on the stack:
-      <tt>llvm.gcroot</tt></a></li>
-    <li><a href="#barriers">Reading and writing references in the heap</a>
-      <ul>
-      <li><a href="#gcwrite">Write barrier: <tt>llvm.gcwrite</tt></a></li>
-      <li><a href="#gcread">Read barrier: <tt>llvm.gcread</tt></a></li>
-      </ul>
-    </li>
-    </ul>
-  </li>
-  
-  <li><a href="#plugin">Compiler plugin interface</a>
-    <ul>
-    <li><a href="#collector-algos">Overview of available features</a></li>
-    <li><a href="#stack-map">Computing stack maps</a></li>
-    <li><a href="#init-roots">Initializing roots to null:
-      <tt>InitRoots</tt></a></li>
-    <li><a href="#custom">Custom lowering of intrinsics: <tt>CustomRoots</tt>, 
-      <tt>CustomReadBarriers</tt>, and <tt>CustomWriteBarriers</tt></a></li>
-    <li><a href="#safe-points">Generating safe points:
-      <tt>NeededSafePoints</tt></a></li>
-    <li><a href="#assembly">Emitting assembly code:
-      <tt>GCMetadataPrinter</tt></a></li>
-    </ul>
-  </li>
-
-  <li><a href="#runtime-impl">Implementing a collector runtime</a>
-    <ul>
-      <li><a href="#gcdescriptors">Tracing GC pointers from heap
-      objects</a></li>
-    </ul>
-  </li>
-  
-  <li><a href="#references">References</a></li>
-  
-</ol>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a> and
-     Gordon Henriksen</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="introduction">Introduction</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Garbage collection is a widely used technique that frees the programmer from
-having to know the lifetimes of heap objects, making software easier to produce
-and maintain. Many programming languages rely on garbage collection for
-automatic memory management. There are two primary forms of garbage collection:
-conservative and accurate.</p>
-
-<p>Conservative garbage collection often does not require any special support
-from either the language or the compiler: it can handle non-type-safe
-programming languages (such as C/C++) and does not require any special
-information from the compiler. The
-<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/">Boehm collector</a> is
-an example of a state-of-the-art conservative collector.</p>
-
-<p>Accurate garbage collection requires the ability to identify all pointers in
-the program at run-time (which requires that the source-language be type-safe in
-most cases). Identifying pointers at run-time requires compiler support to
-locate all places that hold live pointer variables at run-time, including the
-<a href="#gcroot">processor stack and registers</a>.</p>
-
-<p>Conservative garbage collection is attractive because it does not require any
-special compiler support, but it does have problems. In particular, because the
-conservative garbage collector cannot <i>know</i> that a particular word in the
-machine is a pointer, it cannot move live objects in the heap (preventing the
-use of compacting and generational GC algorithms) and it can occasionally suffer
-from memory leaks due to integer values that happen to point to objects in the
-program. In addition, some aggressive compiler transformations can break
-conservative garbage collectors (though these seem rare in practice).</p>
-
-<p>Accurate garbage collectors do not suffer from any of these problems, but
-they can suffer from degraded scalar optimization of the program. In particular,
-because the runtime must be able to identify and update all pointers active in
-the program, some optimizations are less effective. In practice, however, the
-locality and performance benefits of using aggressive garbage collection
-techniques dominates any low-level losses.</p>
-
-<p>This document describes the mechanisms and interfaces provided by LLVM to
-support accurate garbage collection.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="feature">Goals and non-goals</a>
-</h3>
-
-<div>
-
-<p>LLVM's intermediate representation provides <a href="#intrinsics">garbage
-collection intrinsics</a> that offer support for a broad class of
-collector models. For instance, the intrinsics permit:</p>
-
-<ul>
-  <li>semi-space collectors</li>
-  <li>mark-sweep collectors</li>
-  <li>generational collectors</li>
-  <li>reference counting</li>
-  <li>incremental collectors</li>
-  <li>concurrent collectors</li>
-  <li>cooperative collectors</li>
-</ul>
-
-<p>We hope that the primitive support built into the LLVM IR is sufficient to
-support a broad class of garbage collected languages including Scheme, ML, Java,
-C#, Perl, Python, Lua, Ruby, other scripting languages, and more.</p>
-
-<p>However, LLVM does not itself provide a garbage collector&mdash;this should
-be part of your language's runtime library. LLVM provides a framework for
-compile time <a href="#plugin">code generation plugins</a>. The role of these
-plugins is to generate code and data structures which conforms to the <em>binary
-interface</em> specified by the <em>runtime library</em>. This is similar to the
-relationship between LLVM and DWARF debugging info, for example. The
-difference primarily lies in the lack of an established standard in the domain
-of garbage collection&mdash;thus the plugins.</p>
-
-<p>The aspects of the binary interface with which LLVM's GC support is
-concerned are:</p>
-
-<ul>
-  <li>Creation of GC-safe points within code where collection is allowed to
-      execute safely.</li>
-  <li>Computation of the stack map. For each safe point in the code, object
-      references within the stack frame must be identified so that the
-      collector may traverse and perhaps update them.</li>
-  <li>Write barriers when storing object references to the heap. These are
-      commonly used to optimize incremental scans in generational
-      collectors.</li>
-  <li>Emission of read barriers when loading object references. These are
-      useful for interoperating with concurrent collectors.</li>
-</ul>
-
-<p>There are additional areas that LLVM does not directly address:</p>
-
-<ul>
-  <li>Registration of global roots with the runtime.</li>
-  <li>Registration of stack map entries with the runtime.</li>
-  <li>The functions used by the program to allocate memory, trigger a
-      collection, etc.</li>
-  <li>Computation or compilation of type maps, or registration of them with
-      the runtime. These are used to crawl the heap for object
-      references.</li>
-</ul>
-
-<p>In general, LLVM's support for GC does not include features which can be
-adequately addressed with other features of the IR and does not specify a
-particular binary interface. On the plus side, this means that you should be
-able to integrate LLVM with an existing runtime. On the other hand, it leaves
-a lot of work for the developer of a novel language. However, it's easy to get
-started quickly and scale up to a more sophisticated implementation as your
-compiler matures.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="quickstart">Getting started</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Using a GC with LLVM implies many things, for example:</p>
-
-<ul>
-  <li>Write a runtime library or find an existing one which implements a GC
-      heap.<ol>
-    <li>Implement a memory allocator.</li>
-    <li>Design a binary interface for the stack map, used to identify
-        references within a stack frame on the machine stack.*</li>
-    <li>Implement a stack crawler to discover functions on the call stack.*</li>
-    <li>Implement a registry for global roots.</li>
-    <li>Design a binary interface for type maps, used to identify references
-        within heap objects.</li>
-    <li>Implement a collection routine bringing together all of the above.</li>
-  </ol></li>
-  <li>Emit compatible code from your compiler.<ul>
-    <li>Initialization in the main function.</li>
-    <li>Use the <tt>gc "..."</tt> attribute to enable GC code generation
-        (or <tt>F.setGC("...")</tt>).</li>
-    <li>Use <tt>@llvm.gcroot</tt> to mark stack roots.</li>
-    <li>Use <tt>@llvm.gcread</tt> and/or <tt>@llvm.gcwrite</tt> to
-        manipulate GC references, if necessary.</li>
-    <li>Allocate memory using the GC allocation routine provided by the
-        runtime library.</li>
-    <li>Generate type maps according to your runtime's binary interface.</li>
-  </ul></li>
-  <li>Write a compiler plugin to interface LLVM with the runtime library.*<ul>
-    <li>Lower <tt>@llvm.gcread</tt> and <tt>@llvm.gcwrite</tt> to appropriate
-        code sequences.*</li>
-    <li>Compile LLVM's stack map to the binary form expected by the
-        runtime.</li>
-  </ul></li>
-  <li>Load the plugin into the compiler. Use <tt>llc -load</tt> or link the
-      plugin statically with your language's compiler.*</li>
-  <li>Link program executables with the runtime.</li>
-</ul>
-
-<p>To help with several of these tasks (those indicated with a *), LLVM
-includes a highly portable, built-in ShadowStack code generator. It is compiled
-into <tt>llc</tt> and works even with the interpreter and C backends.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="quickstart-compiler">In your compiler</a>
-</h3>
-
-<div>
-
-<p>To turn the shadow stack on for your functions, first call:</p>
-
-<div class="doc_code"><pre
->F.setGC("shadow-stack");</pre></div>
-
-<p>for each function your compiler emits. Since the shadow stack is built into
-LLVM, you do not need to load a plugin.</p>
-
-<p>Your compiler must also use <tt>@llvm.gcroot</tt> as documented.
-Don't forget to create a root for each intermediate value that is generated
-when evaluating an expression. In <tt>h(f(), g())</tt>, the result of
-<tt>f()</tt> could easily be collected if evaluating <tt>g()</tt> triggers a
-collection.</p>
-
-<p>There's no need to use <tt>@llvm.gcread</tt> and <tt>@llvm.gcwrite</tt> over
-plain <tt>load</tt> and <tt>store</tt> for now. You will need them when
-switching to a more advanced GC.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="quickstart-runtime">In your runtime</a>
-</h3>
-
-<div>
-
-<p>The shadow stack doesn't imply a memory allocation algorithm. A semispace
-collector or building atop <tt>malloc</tt> are great places to start, and can
-be implemented with very little code.</p>
-
-<p>When it comes time to collect, however, your runtime needs to traverse the
-stack roots, and for this it needs to integrate with the shadow stack. Luckily,
-doing so is very simple. (This code is heavily commented to help you
-understand the data structure, but there are only 20 lines of meaningful
-code.)</p>
-
-<pre class="doc_code">
-/// @brief The map for a single function's stack frame. One of these is
-///        compiled as constant data into the executable for each function.
-/// 
-/// Storage of metadata values is elided if the %metadata parameter to
-/// @llvm.gcroot is null.
-struct FrameMap {
-  int32_t NumRoots;    //&lt; Number of roots in stack frame.
-  int32_t NumMeta;     //&lt; Number of metadata entries. May be &lt; NumRoots.
-  const void *Meta[0]; //&lt; Metadata for each root.
-};
-
-/// @brief A link in the dynamic shadow stack. One of these is embedded in the
-///        stack frame of each function on the call stack.
-struct StackEntry {
-  StackEntry *Next;    //&lt; Link to next stack entry (the caller's).
-  const FrameMap *Map; //&lt; Pointer to constant FrameMap.
-  void *Roots[0];      //&lt; Stack roots (in-place array).
-};
-
-/// @brief The head of the singly-linked list of StackEntries. Functions push
-///        and pop onto this in their prologue and epilogue.
-/// 
-/// Since there is only a global list, this technique is not threadsafe.
-StackEntry *llvm_gc_root_chain;
-
-/// @brief Calls Visitor(root, meta) for each GC root on the stack.
-///        root and meta are exactly the values passed to
-///        <tt>@llvm.gcroot</tt>.
-/// 
-/// Visitor could be a function to recursively mark live objects. Or it
-/// might copy them to another heap or generation.
-/// 
-/// @param Visitor A function to invoke for every GC root on the stack.
-void visitGCRoots(void (*Visitor)(void **Root, const void *Meta)) {
-  for (StackEntry *R = llvm_gc_root_chain; R; R = R->Next) {
-    unsigned i = 0;
-    
-    // For roots [0, NumMeta), the metadata pointer is in the FrameMap.
-    for (unsigned e = R->Map->NumMeta; i != e; ++i)
-      Visitor(&amp;R->Roots[i], R->Map->Meta[i]);
-    
-    // For roots [NumMeta, NumRoots), the metadata pointer is null.
-    for (unsigned e = R->Map->NumRoots; i != e; ++i)
-      Visitor(&amp;R->Roots[i], NULL);
-  }
-}</pre>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="shadow-stack">About the shadow stack</a>
-</h3>
-
-<div>
-
-<p>Unlike many GC algorithms which rely on a cooperative code generator to
-compile stack maps, this algorithm carefully maintains a linked list of stack
-roots [<a href="#henderson02">Henderson2002</a>]. This so-called "shadow stack"
-mirrors the machine stack. Maintaining this data structure is slower than using
-a stack map compiled into the executable as constant data, but has a significant
-portability advantage because it requires no special support from the target
-code generator, and does not require tricky platform-specific code to crawl
-the machine stack.</p>
-
-<p>The tradeoff for this simplicity and portability is:</p>
-
-<ul>
-  <li>High overhead per function call.</li>
-  <li>Not thread-safe.</li>
-</ul>
-
-<p>Still, it's an easy way to get started. After your compiler and runtime are
-up and running, writing a <a href="#plugin">plugin</a> will allow you to take
-advantage of <a href="#collector-algos">more advanced GC features</a> of LLVM
-in order to improve performance.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="core">IR features</a><a name="intrinsics"></a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This section describes the garbage collection facilities provided by the
-<a href="LangRef.html">LLVM intermediate representation</a>. The exact behavior
-of these IR features is specified by the binary interface implemented by a
-<a href="#plugin">code generation plugin</a>, not by this document.</p>
-
-<p>These facilities are limited to those strictly necessary; they are not
-intended to be a complete interface to any garbage collector. A program will
-need to interface with the GC library using the facilities provided by that
-program.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="gcattr">Specifying GC code generation: <tt>gc "..."</tt></a>
-</h3>
-
-<div>
-
-<div class="doc_code"><tt>
-  define <i>ty</i> @<i>name</i>(...) <span style="text-decoration: underline">gc "<i>name</i>"</span> { ...
-</tt></div>
-
-<p>The <tt>gc</tt> function attribute is used to specify the desired GC style
-to the compiler. Its programmatic equivalent is the <tt>setGC</tt> method of
-<tt>Function</tt>.</p>
-
-<p>Setting <tt>gc "<i>name</i>"</tt> on a function triggers a search for a
-matching code generation plugin "<i>name</i>"; it is that plugin which defines
-the exact nature of the code generated to support GC. If none is found, the
-compiler will raise an error.</p>
-
-<p>Specifying the GC style on a per-function basis allows LLVM to link together
-programs that use different garbage collection algorithms (or none at all).</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="gcroot">Identifying GC roots on the stack: <tt>llvm.gcroot</tt></a>
-</h3>
-
-<div>
-
-<div class="doc_code"><tt>
-  void @llvm.gcroot(i8** %ptrloc, i8* %metadata)
-</tt></div>
-
-<p>The <tt>llvm.gcroot</tt> intrinsic is used to inform LLVM that a stack
-variable references an object on the heap and is to be tracked for garbage
-collection. The exact impact on generated code is specified by a <a
-href="#plugin">compiler plugin</a>. All calls to <tt>llvm.gcroot</tt> <b>must</b> reside
- inside the first basic block.</p>
-
-<p>A compiler which uses mem2reg to raise imperative code using <tt>alloca</tt>
-into SSA form need only add a call to <tt>@llvm.gcroot</tt> for those variables
-which a pointers into the GC heap.</p>
-
-<p>It is also important to mark intermediate values with <tt>llvm.gcroot</tt>.
-For example, consider <tt>h(f(), g())</tt>. Beware leaking the result of
-<tt>f()</tt> in the case that <tt>g()</tt> triggers a collection. Note, that
-stack variables must be initialized and marked with <tt>llvm.gcroot</tt> in
-function's prologue.</p>
-
-<p>The first argument <b>must</b> be a value referring to an alloca instruction
-or a bitcast of an alloca. The second contains a pointer to metadata that
-should be associated with the pointer, and <b>must</b> be a constant or global
-value address. If your target collector uses tags, use a null pointer for
-metadata.</p>
-
-<p>The <tt>%metadata</tt> argument can be used to avoid requiring heap objects
-to have 'isa' pointers or tag bits. [<a href="#appel89">Appel89</a>, <a
-href="#goldberg91">Goldberg91</a>, <a href="#tolmach94">Tolmach94</a>] If
-specified, its value will be tracked along with the location of the pointer in
-the stack frame.</p>
-
-<p>Consider the following fragment of Java code:</p>
-
-<pre class="doc_code">
-       {
-         Object X;   // A null-initialized reference to an object
-         ...
-       }
-</pre>
-
-<p>This block (which may be located in the middle of a function or in a loop
-nest), could be compiled to this LLVM code:</p>
-
-<pre class="doc_code">
-Entry:
-   ;; In the entry block for the function, allocate the
-   ;; stack space for X, which is an LLVM pointer.
-   %X = alloca %Object*
-   
-   ;; Tell LLVM that the stack space is a stack root.
-   ;; Java has type-tags on objects, so we pass null as metadata.
-   %tmp = bitcast %Object** %X to i8**
-   call void @llvm.gcroot(i8** %tmp, i8* null)
-   ...
-
-   ;; "CodeBlock" is the block corresponding to the start
-   ;;  of the scope above.
-CodeBlock:
-   ;; Java null-initializes pointers.
-   store %Object* null, %Object** %X
-
-   ...
-
-   ;; As the pointer goes out of scope, store a null value into
-   ;; it, to indicate that the value is no longer live.
-   store %Object* null, %Object** %X
-   ...
-</pre>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="barriers">Reading and writing references in the heap</a>
-</h3>
-
-<div>
-
-<p>Some collectors need to be informed when the mutator (the program that needs
-garbage collection) either reads a pointer from or writes a pointer to a field
-of a heap object. The code fragments inserted at these points are called
-<em>read barriers</em> and <em>write barriers</em>, respectively. The amount of
-code that needs to be executed is usually quite small and not on the critical
-path of any computation, so the overall performance impact of the barrier is
-tolerable.</p>
-
-<p>Barriers often require access to the <em>object pointer</em> rather than the
-<em>derived pointer</em> (which is a pointer to the field within the
-object). Accordingly, these intrinsics take both pointers as separate arguments
-for completeness. In this snippet, <tt>%object</tt> is the object pointer, and 
-<tt>%derived</tt> is the derived pointer:</p>
-
-<blockquote><pre>
-    ;; An array type.
-    %class.Array = type { %class.Object, i32, [0 x %class.Object*] }
-    ...
-
-    ;; Load the object pointer from a gcroot.
-    %object = load %class.Array** %object_addr
-
-    ;; Compute the derived pointer.
-    %derived = getelementptr %object, i32 0, i32 2, i32 %n</pre></blockquote>
-
-<p>LLVM does not enforce this relationship between the object and derived
-pointer (although a <a href="#plugin">plugin</a> might). However, it would be
-an unusual collector that violated it.</p>
-
-<p>The use of these intrinsics is naturally optional if the target GC does
-require the corresponding barrier. Such a GC plugin will replace the intrinsic
-calls with the corresponding <tt>load</tt> or <tt>store</tt> instruction if they
-are used.</p>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="gcwrite">Write barrier: <tt>llvm.gcwrite</tt></a>
-</h4>
-
-<div>
-
-<div class="doc_code"><tt>
-void @llvm.gcwrite(i8* %value, i8* %object, i8** %derived)
-</tt></div>
-
-<p>For write barriers, LLVM provides the <tt>llvm.gcwrite</tt> intrinsic
-function. It has exactly the same semantics as a non-volatile <tt>store</tt> to
-the derived pointer (the third argument). The exact code generated is specified
-by a <a href="#plugin">compiler plugin</a>.</p>
-
-<p>Many important algorithms require write barriers, including generational
-and concurrent collectors. Additionally, write barriers could be used to
-implement reference counting.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4>
-  <a name="gcread">Read barrier: <tt>llvm.gcread</tt></a>
-</h4>
-
-<div>
-
-<div class="doc_code"><tt>
-i8* @llvm.gcread(i8* %object, i8** %derived)<br>
-</tt></div>
-
-<p>For read barriers, LLVM provides the <tt>llvm.gcread</tt> intrinsic function.
-It has exactly the same semantics as a non-volatile <tt>load</tt> from the
-derived pointer (the second argument). The exact code generated is specified by
-a <a href="#plugin">compiler plugin</a>.</p>
-
-<p>Read barriers are needed by fewer algorithms than write barriers, and may
-have a greater performance impact since pointer reads are more frequent than
-writes.</p>
-
-</div>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="plugin">Implementing a collector plugin</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>User code specifies which GC code generation to use with the <tt>gc</tt>
-function attribute or, equivalently, with the <tt>setGC</tt> method of
-<tt>Function</tt>.</p>
-
-<p>To implement a GC plugin, it is necessary to subclass
-<tt>llvm::GCStrategy</tt>, which can be accomplished in a few lines of
-boilerplate code. LLVM's infrastructure provides access to several important
-algorithms. For an uncontroversial collector, all that remains may be to
-compile LLVM's computed stack map to assembly code (using the binary
-representation expected by the runtime library). This can be accomplished in
-about 100 lines of code.</p>
-
-<p>This is not the appropriate place to implement a garbage collected heap or a
-garbage collector itself. That code should exist in the language's runtime
-library. The compiler plugin is responsible for generating code which
-conforms to the binary interface defined by library, most essentially the
-<a href="#stack-map">stack map</a>.</p>
-
-<p>To subclass <tt>llvm::GCStrategy</tt> and register it with the compiler:</p>
-
-<blockquote><pre>// lib/MyGC/MyGC.cpp - Example LLVM GC plugin
-
-#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/Support/Compiler.h"
-
-using namespace llvm;
-
-namespace {
-  class LLVM_LIBRARY_VISIBILITY MyGC : public GCStrategy {
-  public:
-    MyGC() {}
-  };
-  
-  GCRegistry::Add&lt;MyGC&gt;
-  X("mygc", "My bespoke garbage collector.");
-}</pre></blockquote>
-
-<p>This boilerplate collector does nothing. More specifically:</p>
-
-<ul>
-  <li><tt>llvm.gcread</tt> calls are replaced with the corresponding
-      <tt>load</tt> instruction.</li>
-  <li><tt>llvm.gcwrite</tt> calls are replaced with the corresponding
-      <tt>store</tt> instruction.</li>
-  <li>No safe points are added to the code.</li>
-  <li>The stack map is not compiled into the executable.</li>
-</ul>
-
-<p>Using the LLVM makefiles (like the <a
-href="http://llvm.org/viewvc/llvm-project/llvm/trunk/projects/sample/">sample
-project</a>), this code can be compiled as a plugin using a simple
-makefile:</p>
-
-<blockquote><pre
-># lib/MyGC/Makefile
-
-LEVEL := ../..
-LIBRARYNAME = <var>MyGC</var>
-LOADABLE_MODULE = 1
-
-include $(LEVEL)/Makefile.common</pre></blockquote>
-
-<p>Once the plugin is compiled, code using it may be compiled using <tt>llc
--load=<var>MyGC.so</var></tt> (though <var>MyGC.so</var> may have some other
-platform-specific extension):</p>
-
-<blockquote><pre
->$ cat sample.ll
-define void @f() gc "mygc" {
-entry:
-        ret void
-}
-$ llvm-as &lt; sample.ll | llc -load=MyGC.so</pre></blockquote>
-
-<p>It is also possible to statically link the collector plugin into tools, such
-as a language-specific compiler front-end.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="collector-algos">Overview of available features</a>
-</h3>
-
-<div>
-
-<p><tt>GCStrategy</tt> provides a range of features through which a plugin
-may do useful work. Some of these are callbacks, some are algorithms that can
-be enabled, disabled, or customized. This matrix summarizes the supported (and
-planned) features and correlates them with the collection techniques which
-typically require them.</p>
-
-<table>
-  <tr>
-    <th>Algorithm</th>
-    <th>Done</th>
-    <th>shadow stack</th>
-    <th>refcount</th>
-    <th>mark-sweep</th>
-    <th>copying</th>
-    <th>incremental</th>
-    <th>threaded</th>
-    <th>concurrent</th>
-  </tr>
-  <tr>
-    <th class="rowhead"><a href="#stack-map">stack map</a></th>
-    <td>&#10004;</td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-  </tr>
-  <tr>
-    <th class="rowhead"><a href="#init-roots">initialize roots</a></th>
-    <td>&#10004;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-  </tr>
-  <tr class="doc_warning">
-    <th class="rowhead">derived pointers</th>
-    <td>NO</td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td>&#10008;*</td>
-    <td>&#10008;*</td>
-  </tr>
-  <tr>
-    <th class="rowhead"><em><a href="#custom">custom lowering</a></em></th>
-    <td>&#10004;</td>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-  </tr>
-  <tr>
-    <th class="rowhead indent">gcroot</th>
-    <td>&#10004;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-  </tr>
-  <tr>
-    <th class="rowhead indent">gcwrite</th>
-    <td>&#10004;</td>
-    <td></td>
-    <td>&#10008;</td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-    <td></td>
-    <td>&#10008;</td>
-  </tr>
-  <tr>
-    <th class="rowhead indent">gcread</th>
-    <td>&#10004;</td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-  </tr>
-  <tr>
-    <th class="rowhead"><em><a href="#safe-points">safe points</a></em></th>
-    <td></td>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-  </tr>
-  <tr>
-    <th class="rowhead indent">in calls</th>
-    <td>&#10004;</td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-  </tr>
-  <tr>
-    <th class="rowhead indent">before calls</th>
-    <td>&#10004;</td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-  </tr>
-  <tr class="doc_warning">
-    <th class="rowhead indent">for loops</th>
-    <td>NO</td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-  </tr>
-  <tr>
-    <th class="rowhead indent">before escape</th>
-    <td>&#10004;</td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-  </tr>
-  <tr class="doc_warning">
-    <th class="rowhead">emit code at safe points</th>
-    <td>NO</td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-  </tr>
-  <tr>
-    <th class="rowhead"><em>output</em></th>
-    <td></td>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-    <th></th>
-  </tr>
-  <tr>
-    <th class="rowhead indent"><a href="#assembly">assembly</a></th>
-    <td>&#10004;</td>
-    <td></td>
-    <td></td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-    <td>&#10008;</td>
-  </tr>
-  <tr class="doc_warning">
-    <th class="rowhead indent">JIT</th>
-    <td>NO</td>
-    <td></td>
-    <td></td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-  </tr>
-  <tr class="doc_warning">
-    <th class="rowhead indent">obj</th>
-    <td>NO</td>
-    <td></td>
-    <td></td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-  </tr>
-  <tr class="doc_warning">
-    <th class="rowhead">live analysis</th>
-    <td>NO</td>
-    <td></td>
-    <td></td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-  </tr>
-  <tr class="doc_warning">
-    <th class="rowhead">register map</th>
-    <td>NO</td>
-    <td></td>
-    <td></td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-    <td class="optl">&#10008;</td>
-  </tr>
-  <tr>
-    <td colspan="10">
-      <div><span class="doc_warning">*</span> Derived pointers only pose a
-           hazard to copying collectors.</div>
-      <div><span class="optl">&#10008;</span> in gray denotes a feature which
-           could be utilized if available.</div>
-    </td>
-  </tr>
-</table>
-
-<p>To be clear, the collection techniques above are defined as:</p>
-
-<dl>
-  <dt>Shadow Stack</dt>
-  <dd>The mutator carefully maintains a linked list of stack roots.</dd>
-  <dt>Reference Counting</dt>
-  <dd>The mutator maintains a reference count for each object and frees an
-      object when its count falls to zero.</dd>
-  <dt>Mark-Sweep</dt>
-  <dd>When the heap is exhausted, the collector marks reachable objects starting
-      from the roots, then deallocates unreachable objects in a sweep
-      phase.</dd>
-  <dt>Copying</dt>
-  <dd>As reachability analysis proceeds, the collector copies objects from one
-      heap area to another, compacting them in the process. Copying collectors
-      enable highly efficient "bump pointer" allocation and can improve locality
-      of reference.</dd>
-  <dt>Incremental</dt>
-  <dd>(Including generational collectors.) Incremental collectors generally have
-      all the properties of a copying collector (regardless of whether the
-      mature heap is compacting), but bring the added complexity of requiring
-      write barriers.</dd>
-  <dt>Threaded</dt>
-  <dd>Denotes a multithreaded mutator; the collector must still stop the mutator
-      ("stop the world") before beginning reachability analysis. Stopping a
-      multithreaded mutator is a complicated problem. It generally requires
-      highly platform specific code in the runtime, and the production of
-      carefully designed machine code at safe points.</dd>
-  <dt>Concurrent</dt>
-  <dd>In this technique, the mutator and the collector run concurrently, with
-      the goal of eliminating pause times. In a <em>cooperative</em> collector,
-      the mutator further aids with collection should a pause occur, allowing
-      collection to take advantage of multiprocessor hosts. The "stop the world"
-      problem of threaded collectors is generally still present to a limited
-      extent. Sophisticated marking algorithms are necessary. Read barriers may
-      be necessary.</dd>
-</dl>
-
-<p>As the matrix indicates, LLVM's garbage collection infrastructure is already
-suitable for a wide variety of collectors, but does not currently extend to
-multithreaded programs. This will be added in the future as there is
-interest.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="stack-map">Computing stack maps</a>
-</h3>
-
-<div>
-
-<p>LLVM automatically computes a stack map. One of the most important features
-of a <tt>GCStrategy</tt> is to compile this information into the executable in
-the binary representation expected by the runtime library.</p>
-
-<p>The stack map consists of the location and identity of each GC root in the
-each function in the module. For each root:</p>
-
-<ul>
-  <li><tt>RootNum</tt>: The index of the root.</li>
-  <li><tt>StackOffset</tt>: The offset of the object relative to the frame
-      pointer.</li>
-  <li><tt>RootMetadata</tt>: The value passed as the <tt>%metadata</tt>
-      parameter to the <a href="#gcroot"><tt>@llvm.gcroot</tt></a> intrinsic.</li>
-</ul>
-
-<p>Also, for the function as a whole:</p>
-
-<ul>
-  <li><tt>getFrameSize()</tt>: The overall size of the function's initial
-      stack frame, not accounting for any dynamic allocation.</li>
-  <li><tt>roots_size()</tt>: The count of roots in the function.</li>
-</ul>
-
-<p>To access the stack map, use <tt>GCFunctionMetadata::roots_begin()</tt> and
--<tt>end()</tt> from the <tt><a
-href="#assembly">GCMetadataPrinter</a></tt>:</p>
-
-<blockquote><pre
->for (iterator I = begin(), E = end(); I != E; ++I) {
-  GCFunctionInfo *FI = *I;
-  unsigned FrameSize = FI-&gt;getFrameSize();
-  size_t RootCount = FI-&gt;roots_size();
-
-  for (GCFunctionInfo::roots_iterator RI = FI-&gt;roots_begin(),
-                                      RE = FI-&gt;roots_end();
-                                      RI != RE; ++RI) {
-    int RootNum = RI->Num;
-    int RootStackOffset = RI->StackOffset;
-    Constant *RootMetadata = RI->Metadata;
-  }
-}</pre></blockquote>
-
-<p>If the <tt>llvm.gcroot</tt> intrinsic is eliminated before code generation by
-a custom lowering pass, LLVM will compute an empty stack map. This may be useful
-for collector plugins which implement reference counting or a shadow stack.</p>
-
-</div>
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="init-roots">Initializing roots to null: <tt>InitRoots</tt></a>
-</h3>
-
-<div>
-
-<blockquote><pre
->MyGC::MyGC() {
-  InitRoots = true;
-}</pre></blockquote>
-
-<p>When set, LLVM will automatically initialize each root to <tt>null</tt> upon
-entry to the function. This prevents the GC's sweep phase from visiting
-uninitialized pointers, which will almost certainly cause it to crash. This
-initialization occurs before custom lowering, so the two may be used
-together.</p>
-
-<p>Since LLVM does not yet compute liveness information, there is no means of
-distinguishing an uninitialized stack root from an initialized one. Therefore,
-this feature should be used by all GC plugins. It is enabled by default.</p>
-
-</div>
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="custom">Custom lowering of intrinsics: <tt>CustomRoots</tt>, 
-    <tt>CustomReadBarriers</tt>, and <tt>CustomWriteBarriers</tt></a>
-</h3>
-
-<div>
-
-<p>For GCs which use barriers or unusual treatment of stack roots, these
-flags allow the collector to perform arbitrary transformations of the LLVM
-IR:</p>
-
-<blockquote><pre
->class MyGC : public GCStrategy {
-public:
-  MyGC() {
-    CustomRoots = true;
-    CustomReadBarriers = true;
-    CustomWriteBarriers = true;
-  }
-  
-  virtual bool initializeCustomLowering(Module &amp;M);
-  virtual bool performCustomLowering(Function &amp;F);
-};</pre></blockquote>
-
-<p>If any of these flags are set, then LLVM suppresses its default lowering for
-the corresponding intrinsics and instead calls
-<tt>performCustomLowering</tt>.</p>
-
-<p>LLVM's default action for each intrinsic is as follows:</p>
-
-<ul>
-  <li><tt>llvm.gcroot</tt>: Leave it alone. The code generator must see it
-                            or the stack map will not be computed.</li>
-  <li><tt>llvm.gcread</tt>: Substitute a <tt>load</tt> instruction.</li>
-  <li><tt>llvm.gcwrite</tt>: Substitute a <tt>store</tt> instruction.</li>
-</ul>
-
-<p>If <tt>CustomReadBarriers</tt> or <tt>CustomWriteBarriers</tt> are specified,
-then <tt>performCustomLowering</tt> <strong>must</strong> eliminate the
-corresponding barriers.</p>
-
-<p><tt>performCustomLowering</tt> must comply with the same restrictions as <a
-href="WritingAnLLVMPass.html#runOnFunction"><tt
->FunctionPass::runOnFunction</tt></a>.
-Likewise, <tt>initializeCustomLowering</tt> has the same semantics as <a
-href="WritingAnLLVMPass.html#doInitialization_mod"><tt
->Pass::doInitialization(Module&amp;)</tt></a>.</p>
-
-<p>The following can be used as a template:</p>
-
-<blockquote><pre
->#include "llvm/Module.h"
-#include "llvm/IntrinsicInst.h"
-
-bool MyGC::initializeCustomLowering(Module &amp;M) {
-  return false;
-}
-
-bool MyGC::performCustomLowering(Function &amp;F) {
-  bool MadeChange = false;
-  
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
-    for (BasicBlock::iterator II = BB-&gt;begin(), E = BB-&gt;end(); II != E; )
-      if (IntrinsicInst *CI = dyn_cast&lt;IntrinsicInst&gt;(II++))
-        if (Function *F = CI-&gt;getCalledFunction())
-          switch (F-&gt;getIntrinsicID()) {
-          case Intrinsic::gcwrite:
-            // Handle llvm.gcwrite.
-            CI-&gt;eraseFromParent();
-            MadeChange = true;
-            break;
-          case Intrinsic::gcread:
-            // Handle llvm.gcread.
-            CI-&gt;eraseFromParent();
-            MadeChange = true;
-            break;
-          case Intrinsic::gcroot:
-            // Handle llvm.gcroot.
-            CI-&gt;eraseFromParent();
-            MadeChange = true;
-            break;
-          }
-  
-  return MadeChange;
-}</pre></blockquote>
-
-</div>
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="safe-points">Generating safe points: <tt>NeededSafePoints</tt></a>
-</h3>
-
-<div>
-
-<p>LLVM can compute four kinds of safe points:</p>
-
-<blockquote><pre
->namespace GC {
-  /// PointKind - The type of a collector-safe point.
-  /// 
-  enum PointKind {
-    Loop,    //&lt; Instr is a loop (backwards branch).
-    Return,  //&lt; Instr is a return instruction.
-    PreCall, //&lt; Instr is a call instruction.
-    PostCall //&lt; Instr is the return address of a call.
-  };
-}</pre></blockquote>
-
-<p>A collector can request any combination of the four by setting the 
-<tt>NeededSafePoints</tt> mask:</p>
-
-<blockquote><pre
->MyGC::MyGC() {
-  NeededSafePoints = 1 &lt;&lt; GC::Loop
-                   | 1 &lt;&lt; GC::Return
-                   | 1 &lt;&lt; GC::PreCall
-                   | 1 &lt;&lt; GC::PostCall;
-}</pre></blockquote>
-
-<p>It can then use the following routines to access safe points.</p>
-
-<blockquote><pre
->for (iterator I = begin(), E = end(); I != E; ++I) {
-  GCFunctionInfo *MD = *I;
-  size_t PointCount = MD-&gt;size();
-
-  for (GCFunctionInfo::iterator PI = MD-&gt;begin(),
-                                PE = MD-&gt;end(); PI != PE; ++PI) {
-    GC::PointKind PointKind = PI-&gt;Kind;
-    unsigned PointNum = PI-&gt;Num;
-  }
-}
-</pre></blockquote>
-
-<p>Almost every collector requires <tt>PostCall</tt> safe points, since these
-correspond to the moments when the function is suspended during a call to a
-subroutine.</p>
-
-<p>Threaded programs generally require <tt>Loop</tt> safe points to guarantee
-that the application will reach a safe point within a bounded amount of time,
-even if it is executing a long-running loop which contains no function
-calls.</p>
-
-<p>Threaded collectors may also require <tt>Return</tt> and <tt>PreCall</tt>
-safe points to implement "stop the world" techniques using self-modifying code,
-where it is important that the program not exit the function without reaching a
-safe point (because only the topmost function has been patched).</p>
-
-</div>
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="assembly">Emitting assembly code: <tt>GCMetadataPrinter</tt></a>
-</h3>
-
-<div>
-
-<p>LLVM allows a plugin to print arbitrary assembly code before and after the
-rest of a module's assembly code. At the end of the module, the GC can compile
-the LLVM stack map into assembly code. (At the beginning, this information is not
-yet computed.)</p>
-
-<p>Since AsmWriter and CodeGen are separate components of LLVM, a separate
-abstract base class and registry is provided for printing assembly code, the
-<tt>GCMetadaPrinter</tt> and <tt>GCMetadataPrinterRegistry</tt>. The AsmWriter
-will look for such a subclass if the <tt>GCStrategy</tt> sets
-<tt>UsesMetadata</tt>:</p>
-
-<blockquote><pre
->MyGC::MyGC() {
-  UsesMetadata = true;
-}</pre></blockquote>
-
-<p>This separation allows JIT-only clients to be smaller.</p>
-
-<p>Note that LLVM does not currently have analogous APIs to support code
-generation in the JIT, nor using the object writers.</p>
-
-<blockquote><pre
->// lib/MyGC/MyGCPrinter.cpp - Example LLVM GC printer
-
-#include "llvm/CodeGen/GCMetadataPrinter.h"
-#include "llvm/Support/Compiler.h"
-
-using namespace llvm;
-
-namespace {
-  class LLVM_LIBRARY_VISIBILITY MyGCPrinter : public GCMetadataPrinter {
-  public:
-    virtual void beginAssembly(std::ostream &amp;OS, AsmPrinter &amp;AP,
-                               const TargetAsmInfo &amp;TAI);
-  
-    virtual void finishAssembly(std::ostream &amp;OS, AsmPrinter &amp;AP,
-                                const TargetAsmInfo &amp;TAI);
-  };
-  
-  GCMetadataPrinterRegistry::Add&lt;MyGCPrinter&gt;
-  X("mygc", "My bespoke garbage collector.");
-}</pre></blockquote>
-
-<p>The collector should use <tt>AsmPrinter</tt> and <tt>TargetAsmInfo</tt> to
-print portable assembly code to the <tt>std::ostream</tt>. The collector itself
-contains the stack map for the entire module, and may access the
-<tt>GCFunctionInfo</tt> using its own <tt>begin()</tt> and <tt>end()</tt>
-methods. Here's a realistic example:</p>
-
-<blockquote><pre
->#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Function.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetAsmInfo.h"
-
-void MyGCPrinter::beginAssembly(std::ostream &amp;OS, AsmPrinter &amp;AP,
-                                const TargetAsmInfo &amp;TAI) {
-  // Nothing to do.
-}
-
-void MyGCPrinter::finishAssembly(std::ostream &amp;OS, AsmPrinter &amp;AP,
-                                 const TargetAsmInfo &amp;TAI) {
-  // Set up for emitting addresses.
-  const char *AddressDirective;
-  int AddressAlignLog;
-  if (AP.TM.getDataLayout()->getPointerSize() == sizeof(int32_t)) {
-    AddressDirective = TAI.getData32bitsDirective();
-    AddressAlignLog = 2;
-  } else {
-    AddressDirective = TAI.getData64bitsDirective();
-    AddressAlignLog = 3;
-  }
-  
-  // Put this in the data section.
-  AP.SwitchToDataSection(TAI.getDataSection());
-  
-  // For each function...
-  for (iterator FI = begin(), FE = end(); FI != FE; ++FI) {
-    GCFunctionInfo &amp;MD = **FI;
-    
-    // Emit this data structure:
-    // 
-    // struct {
-    //   int32_t PointCount;
-    //   struct {
-    //     void *SafePointAddress;
-    //     int32_t LiveCount;
-    //     int32_t LiveOffsets[LiveCount];
-    //   } Points[PointCount];
-    // } __gcmap_&lt;FUNCTIONNAME&gt;;
-    
-    // Align to address width.
-    AP.EmitAlignment(AddressAlignLog);
-    
-    // Emit the symbol by which the stack map entry can be found.
-    std::string Symbol;
-    Symbol += TAI.getGlobalPrefix();
-    Symbol += "__gcmap_";
-    Symbol += MD.getFunction().getName();
-    if (const char *GlobalDirective = TAI.getGlobalDirective())
-      OS &lt;&lt; GlobalDirective &lt;&lt; Symbol &lt;&lt; "\n";
-    OS &lt;&lt; TAI.getGlobalPrefix() &lt;&lt; Symbol &lt;&lt; ":\n";
-    
-    // Emit PointCount.
-    AP.EmitInt32(MD.size());
-    AP.EOL("safe point count");
-    
-    // And each safe point...
-    for (GCFunctionInfo::iterator PI = MD.begin(),
-                                     PE = MD.end(); PI != PE; ++PI) {
-      // Align to address width.
-      AP.EmitAlignment(AddressAlignLog);
-      
-      // Emit the address of the safe point.
-      OS &lt;&lt; AddressDirective
-         &lt;&lt; TAI.getPrivateGlobalPrefix() &lt;&lt; "label" &lt;&lt; PI-&gt;Num;
-      AP.EOL("safe point address");
-      
-      // Emit the stack frame size.
-      AP.EmitInt32(MD.getFrameSize());
-      AP.EOL("stack frame size");
-      
-      // Emit the number of live roots in the function.
-      AP.EmitInt32(MD.live_size(PI));
-      AP.EOL("live root count");
-      
-      // And for each live root...
-      for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
-                                            LE = MD.live_end(PI);
-                                            LI != LE; ++LI) {
-        // Print its offset within the stack frame.
-        AP.EmitInt32(LI-&gt;StackOffset);
-        AP.EOL("stack offset");
-      }
-    }
-  }
-}
-</pre></blockquote>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="references">References</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p><a name="appel89">[Appel89]</a> Runtime Tags Aren't Necessary. Andrew
-W. Appel. Lisp and Symbolic Computation 19(7):703-705, July 1989.</p>
-
-<p><a name="goldberg91">[Goldberg91]</a> Tag-free garbage collection for
-strongly typed programming languages. Benjamin Goldberg. ACM SIGPLAN
-PLDI'91.</p>
-
-<p><a name="tolmach94">[Tolmach94]</a> Tag-free garbage collection using
-explicit type parameters. Andrew Tolmach. Proceedings of the 1994 ACM
-conference on LISP and functional programming.</p>
-
-<p><a name="henderson02">[Henderson2002]</a> <a
-href="http://citeseer.ist.psu.edu/henderson02accurate.html">
-Accurate Garbage Collection in an Uncooperative Environment</a>.
-Fergus Henderson. International Symposium on Memory Management 2002.</p>
-
-</div>
-
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-
-</body>
-</html>
diff --git a/docs/GarbageCollection.rst b/docs/GarbageCollection.rst
new file mode 100644
index 0000000000..b0b2718409
--- /dev/null
+++ b/docs/GarbageCollection.rst
@@ -0,0 +1,1051 @@
+=====================================
+Accurate Garbage Collection with LLVM
+=====================================
+
+.. contents::
+   :local:
+
+.. sectionauthor:: Chris Lattner <sabre@nondot.org>  and
+                   Gordon Henriksen
+
+Introduction
+============
+
+Garbage collection is a widely used technique that frees the programmer from
+having to know the lifetimes of heap objects, making software easier to produce
+and maintain.  Many programming languages rely on garbage collection for
+automatic memory management.  There are two primary forms of garbage collection:
+conservative and accurate.
+
+Conservative garbage collection often does not require any special support from
+either the language or the compiler: it can handle non-type-safe programming
+languages (such as C/C++) and does not require any special information from the
+compiler.  The `Boehm collector
+<http://www.hpl.hp.com/personal/Hans_Boehm/gc/>`__ is an example of a
+state-of-the-art conservative collector.
+
+Accurate garbage collection requires the ability to identify all pointers in the
+program at run-time (which requires that the source-language be type-safe in
+most cases).  Identifying pointers at run-time requires compiler support to
+locate all places that hold live pointer variables at run-time, including the
+:ref:`processor stack and registers <gcroot>`.
+
+Conservative garbage collection is attractive because it does not require any
+special compiler support, but it does have problems.  In particular, because the
+conservative garbage collector cannot *know* that a particular word in the
+machine is a pointer, it cannot move live objects in the heap (preventing the
+use of compacting and generational GC algorithms) and it can occasionally suffer
+from memory leaks due to integer values that happen to point to objects in the
+program.  In addition, some aggressive compiler transformations can break
+conservative garbage collectors (though these seem rare in practice).
+
+Accurate garbage collectors do not suffer from any of these problems, but they
+can suffer from degraded scalar optimization of the program.  In particular,
+because the runtime must be able to identify and update all pointers active in
+the program, some optimizations are less effective.  In practice, however, the
+locality and performance benefits of using aggressive garbage collection
+techniques dominates any low-level losses.
+
+This document describes the mechanisms and interfaces provided by LLVM to
+support accurate garbage collection.
+
+.. _feature:
+
+Goals and non-goals
+-------------------
+
+LLVM's intermediate representation provides :ref:`garbage collection intrinsics
+<gc_intrinsics>` that offer support for a broad class of collector models.  For
+instance, the intrinsics permit:
+
+* semi-space collectors
+
+* mark-sweep collectors
+
+* generational collectors
+
+* reference counting
+
+* incremental collectors
+
+* concurrent collectors
+
+* cooperative collectors
+
+We hope that the primitive support built into the LLVM IR is sufficient to
+support a broad class of garbage collected languages including Scheme, ML, Java,
+C#, Perl, Python, Lua, Ruby, other scripting languages, and more.
+
+However, LLVM does not itself provide a garbage collector --- this should be
+part of your language's runtime library.  LLVM provides a framework for compile
+time :ref:`code generation plugins <plugin>`.  The role of these plugins is to
+generate code and data structures which conforms to the *binary interface*
+specified by the *runtime library*.  This is similar to the relationship between
+LLVM and DWARF debugging info, for example.  The difference primarily lies in
+the lack of an established standard in the domain of garbage collection --- thus
+the plugins.
+
+The aspects of the binary interface with which LLVM's GC support is
+concerned are:
+
+* Creation of GC-safe points within code where collection is allowed to execute
+  safely.
+
+* Computation of the stack map.  For each safe point in the code, object
+  references within the stack frame must be identified so that the collector may
+  traverse and perhaps update them.
+
+* Write barriers when storing object references to the heap.  These are commonly
+  used to optimize incremental scans in generational collectors.
+
+* Emission of read barriers when loading object references.  These are useful
+  for interoperating with concurrent collectors.
+
+There are additional areas that LLVM does not directly address:
+
+* Registration of global roots with the runtime.
+
+* Registration of stack map entries with the runtime.
+
+* The functions used by the program to allocate memory, trigger a collection,
+  etc.
+
+* Computation or compilation of type maps, or registration of them with the
+  runtime.  These are used to crawl the heap for object references.
+
+In general, LLVM's support for GC does not include features which can be
+adequately addressed with other features of the IR and does not specify a
+particular binary interface.  On the plus side, this means that you should be
+able to integrate LLVM with an existing runtime.  On the other hand, it leaves a
+lot of work for the developer of a novel language.  However, it's easy to get
+started quickly and scale up to a more sophisticated implementation as your
+compiler matures.
+
+.. _quickstart:
+
+Getting started
+===============
+
+Using a GC with LLVM implies many things, for example:
+
+* Write a runtime library or find an existing one which implements a GC heap.
+
+  #. Implement a memory allocator.
+
+  #. Design a binary interface for the stack map, used to identify references
+     within a stack frame on the machine stack.\*
+
+  #. Implement a stack crawler to discover functions on the call stack.\*
+
+  #. Implement a registry for global roots.
+
+  #. Design a binary interface for type maps, used to identify references
+     within heap objects.
+
+  #. Implement a collection routine bringing together all of the above.
+
+* Emit compatible code from your compiler.
+
+  * Initialization in the main function.
+
+  * Use the ``gc "..."`` attribute to enable GC code generation (or
+    ``F.setGC("...")``).
+
+  * Use ``@llvm.gcroot`` to mark stack roots.
+
+  * Use ``@llvm.gcread`` and/or ``@llvm.gcwrite`` to manipulate GC references,
+    if necessary.
+
+  * Allocate memory using the GC allocation routine provided by the runtime
+    library.
+
+  * Generate type maps according to your runtime's binary interface.
+
+* Write a compiler plugin to interface LLVM with the runtime library.\*
+
+  * Lower ``@llvm.gcread`` and ``@llvm.gcwrite`` to appropriate code
+    sequences.\*
+
+  * Compile LLVM's stack map to the binary form expected by the runtime.
+
+* Load the plugin into the compiler.  Use ``llc -load`` or link the plugin
+  statically with your language's compiler.\*
+
+* Link program executables with the runtime.
+
+To help with several of these tasks (those indicated with a \*), LLVM includes a
+highly portable, built-in ShadowStack code generator.  It is compiled into
+``llc`` and works even with the interpreter and C backends.
+
+.. _quickstart-compiler:
+
+In your compiler
+----------------
+
+To turn the shadow stack on for your functions, first call:
+
+.. code-block:: c++
+
+  F.setGC("shadow-stack");
+
+for each function your compiler emits. Since the shadow stack is built into
+LLVM, you do not need to load a plugin.
+
+Your compiler must also use ``@llvm.gcroot`` as documented.  Don't forget to
+create a root for each intermediate value that is generated when evaluating an
+expression.  In ``h(f(), g())``, the result of ``f()`` could easily be collected
+if evaluating ``g()`` triggers a collection.
+
+There's no need to use ``@llvm.gcread`` and ``@llvm.gcwrite`` over plain
+``load`` and ``store`` for now.  You will need them when switching to a more
+advanced GC.
+
+.. _quickstart-runtime:
+
+In your runtime
+---------------
+
+The shadow stack doesn't imply a memory allocation algorithm.  A semispace
+collector or building atop ``malloc`` are great places to start, and can be
+implemented with very little code.
+
+When it comes time to collect, however, your runtime needs to traverse the stack
+roots, and for this it needs to integrate with the shadow stack.  Luckily, doing
+so is very simple. (This code is heavily commented to help you understand the
+data structure, but there are only 20 lines of meaningful code.)
+
+.. code-block:: c++
+
+  /// @brief The map for a single function's stack frame.  One of these is
+  ///        compiled as constant data into the executable for each function.
+  ///
+  /// Storage of metadata values is elided if the %metadata parameter to
+  /// @llvm.gcroot is null.
+  struct FrameMap {
+    int32_t NumRoots;    //< Number of roots in stack frame.
+    int32_t NumMeta;     //< Number of metadata entries.  May be < NumRoots.
+    const void *Meta[0]; //< Metadata for each root.
+  };
+
+  /// @brief A link in the dynamic shadow stack.  One of these is embedded in
+  ///        the stack frame of each function on the call stack.
+  struct StackEntry {
+    StackEntry *Next;    //< Link to next stack entry (the caller's).
+    const FrameMap *Map; //< Pointer to constant FrameMap.
+    void *Roots[0];      //< Stack roots (in-place array).
+  };
+
+  /// @brief The head of the singly-linked list of StackEntries.  Functions push
+  ///        and pop onto this in their prologue and epilogue.
+  ///
+  /// Since there is only a global list, this technique is not threadsafe.
+  StackEntry *llvm_gc_root_chain;
+
+  /// @brief Calls Visitor(root, meta) for each GC root on the stack.
+  ///        root and meta are exactly the values passed to
+  ///        @llvm.gcroot.
+  ///
+  /// Visitor could be a function to recursively mark live objects.  Or it
+  /// might copy them to another heap or generation.
+  ///
+  /// @param Visitor A function to invoke for every GC root on the stack.
+  void visitGCRoots(void (*Visitor)(void **Root, const void *Meta)) {
+    for (StackEntry *R = llvm_gc_root_chain; R; R = R->Next) {
+      unsigned i = 0;
+
+      // For roots [0, NumMeta), the metadata pointer is in the FrameMap.
+      for (unsigned e = R->Map->NumMeta; i != e; ++i)
+        Visitor(&R->Roots[i], R->Map->Meta[i]);
+
+      // For roots [NumMeta, NumRoots), the metadata pointer is null.
+      for (unsigned e = R->Map->NumRoots; i != e; ++i)
+        Visitor(&R->Roots[i], NULL);
+    }
+  }
+
+.. _shadow-stack:
+
+About the shadow stack
+----------------------
+
+Unlike many GC algorithms which rely on a cooperative code generator to compile
+stack maps, this algorithm carefully maintains a linked list of stack roots
+[:ref:`Henderson2002 <henderson02>`].  This so-called "shadow stack" mirrors the
+machine stack.  Maintaining this data structure is slower than using a stack map
+compiled into the executable as constant data, but has a significant portability
+advantage because it requires no special support from the target code generator,
+and does not require tricky platform-specific code to crawl the machine stack.
+
+The tradeoff for this simplicity and portability is:
+
+* High overhead per function call.
+
+* Not thread-safe.
+
+Still, it's an easy way to get started.  After your compiler and runtime are up
+and running, writing a plugin_ will allow you to take advantage of :ref:`more
+advanced GC features <collector-algos>` of LLVM in order to improve performance.
+
+.. _gc_intrinsics:
+
+IR features
+===========
+
+This section describes the garbage collection facilities provided by the
+:doc:`LLVM intermediate representation <LangRef>`.  The exact behavior of these
+IR features is specified by the binary interface implemented by a :ref:`code
+generation plugin <plugin>`, not by this document.
+
+These facilities are limited to those strictly necessary; they are not intended
+to be a complete interface to any garbage collector.  A program will need to
+interface with the GC library using the facilities provided by that program.
+
+.. _gcattr:
+
+Specifying GC code generation: ``gc "..."``
+-------------------------------------------
+
+.. code-block:: llvm
+
+  define ty @name(...) gc "name" { ...
+
+The ``gc`` function attribute is used to specify the desired GC style to the
+compiler.  Its programmatic equivalent is the ``setGC`` method of ``Function``.
+
+Setting ``gc "name"`` on a function triggers a search for a matching code
+generation plugin "*name*"; it is that plugin which defines the exact nature of
+the code generated to support GC.  If none is found, the compiler will raise an
+error.
+
+Specifying the GC style on a per-function basis allows LLVM to link together
+programs that use different garbage collection algorithms (or none at all).
+
+.. _gcroot:
+
+Identifying GC roots on the stack: ``llvm.gcroot``
+--------------------------------------------------
+
+.. code-block:: llvm
+
+  void @llvm.gcroot(i8** %ptrloc, i8* %metadata)
+
+The ``llvm.gcroot`` intrinsic is used to inform LLVM that a stack variable
+references an object on the heap and is to be tracked for garbage collection.
+The exact impact on generated code is specified by a :ref:`compiler plugin
+<plugin>`.  All calls to ``llvm.gcroot`` **must** reside inside the first basic
+block.
+
+A compiler which uses mem2reg to raise imperative code using ``alloca`` into SSA
+form need only add a call to ``@llvm.gcroot`` for those variables which a
+pointers into the GC heap.
+
+It is also important to mark intermediate values with ``llvm.gcroot``.  For
+example, consider ``h(f(), g())``.  Beware leaking the result of ``f()`` in the
+case that ``g()`` triggers a collection.  Note, that stack variables must be
+initialized and marked with ``llvm.gcroot`` in function's prologue.
+
+The first argument **must** be a value referring to an alloca instruction or a
+bitcast of an alloca.  The second contains a pointer to metadata that should be
+associated with the pointer, and **must** be a constant or global value
+address.  If your target collector uses tags, use a null pointer for metadata.
+
+The ``%metadata`` argument can be used to avoid requiring heap objects to have
+'isa' pointers or tag bits. [Appel89_, Goldberg91_, Tolmach94_] If specified,
+its value will be tracked along with the location of the pointer in the stack
+frame.
+
+Consider the following fragment of Java code:
+
+.. code-block:: java
+
+   {
+     Object X;   // A null-initialized reference to an object
+     ...
+   }
+
+This block (which may be located in the middle of a function or in a loop nest),
+could be compiled to this LLVM code:
+
+.. code-block:: llvm
+
+  Entry:
+     ;; In the entry block for the function, allocate the
+     ;; stack space for X, which is an LLVM pointer.
+     %X = alloca %Object*
+
+     ;; Tell LLVM that the stack space is a stack root.
+     ;; Java has type-tags on objects, so we pass null as metadata.
+     %tmp = bitcast %Object** %X to i8**
+     call void @llvm.gcroot(i8** %tmp, i8* null)
+     ...
+
+     ;; "CodeBlock" is the block corresponding to the start
+     ;;  of the scope above.
+  CodeBlock:
+     ;; Java null-initializes pointers.
+     store %Object* null, %Object** %X
+
+     ...
+
+     ;; As the pointer goes out of scope, store a null value into
+     ;; it, to indicate that the value is no longer live.
+     store %Object* null, %Object** %X
+     ...
+
+.. _barriers:
+
+Reading and writing references in the heap
+------------------------------------------
+
+Some collectors need to be informed when the mutator (the program that needs
+garbage collection) either reads a pointer from or writes a pointer to a field
+of a heap object.  The code fragments inserted at these points are called *read
+barriers* and *write barriers*, respectively.  The amount of code that needs to
+be executed is usually quite small and not on the critical path of any
+computation, so the overall performance impact of the barrier is tolerable.
+
+Barriers often require access to the *object pointer* rather than the *derived
+pointer* (which is a pointer to the field within the object).  Accordingly,
+these intrinsics take both pointers as separate arguments for completeness.  In
+this snippet, ``%object`` is the object pointer, and ``%derived`` is the derived
+pointer:
+
+.. code-block:: llvm
+
+  ;; An array type.
+  %class.Array = type { %class.Object, i32, [0 x %class.Object*] }
+  ...
+
+  ;; Load the object pointer from a gcroot.
+  %object = load %class.Array** %object_addr
+
+  ;; Compute the derived pointer.
+  %derived = getelementptr %object, i32 0, i32 2, i32 %n
+
+LLVM does not enforce this relationship between the object and derived pointer
+(although a plugin_ might).  However, it would be an unusual collector that
+violated it.
+
+The use of these intrinsics is naturally optional if the target GC does require
+the corresponding barrier.  Such a GC plugin will replace the intrinsic calls
+with the corresponding ``load`` or ``store`` instruction if they are used.
+
+.. _gcwrite:
+
+Write barrier: ``llvm.gcwrite``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  void @llvm.gcwrite(i8* %value, i8* %object, i8** %derived)
+
+For write barriers, LLVM provides the ``llvm.gcwrite`` intrinsic function.  It
+has exactly the same semantics as a non-volatile ``store`` to the derived
+pointer (the third argument).  The exact code generated is specified by a
+compiler plugin_.
+
+Many important algorithms require write barriers, including generational and
+concurrent collectors.  Additionally, write barriers could be used to implement
+reference counting.
+
+.. _gcread:
+
+Read barrier: ``llvm.gcread``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: llvm
+
+  i8* @llvm.gcread(i8* %object, i8** %derived)
+
+For read barriers, LLVM provides the ``llvm.gcread`` intrinsic function.  It has
+exactly the same semantics as a non-volatile ``load`` from the derived pointer
+(the second argument).  The exact code generated is specified by a compiler
+plugin_.
+
+Read barriers are needed by fewer algorithms than write barriers, and may have a
+greater performance impact since pointer reads are more frequent than writes.
+
+.. _plugin:
+
+Implementing a collector plugin
+===============================
+
+User code specifies which GC code generation to use with the ``gc`` function
+attribute or, equivalently, with the ``setGC`` method of ``Function``.
+
+To implement a GC plugin, it is necessary to subclass ``llvm::GCStrategy``,
+which can be accomplished in a few lines of boilerplate code.  LLVM's
+infrastructure provides access to several important algorithms.  For an
+uncontroversial collector, all that remains may be to compile LLVM's computed
+stack map to assembly code (using the binary representation expected by the
+runtime library).  This can be accomplished in about 100 lines of code.
+
+This is not the appropriate place to implement a garbage collected heap or a
+garbage collector itself.  That code should exist in the language's runtime
+library.  The compiler plugin is responsible for generating code which conforms
+to the binary interface defined by library, most essentially the :ref:`stack map
+<stack-map>`.
+
+To subclass ``llvm::GCStrategy`` and register it with the compiler:
+
+.. code-block:: c++
+
+  // lib/MyGC/MyGC.cpp - Example LLVM GC plugin
+
+  #include "llvm/CodeGen/GCStrategy.h"
+  #include "llvm/CodeGen/GCMetadata.h"
+  #include "llvm/Support/Compiler.h"
+
+  using namespace llvm;
+
+  namespace {
+    class LLVM_LIBRARY_VISIBILITY MyGC : public GCStrategy {
+    public:
+      MyGC() {}
+    };
+
+    GCRegistry::Add<MyGC>
+    X("mygc", "My bespoke garbage collector.");
+  }
+
+This boilerplate collector does nothing.  More specifically:
+
+* ``llvm.gcread`` calls are replaced with the corresponding ``load``
+  instruction.
+
+* ``llvm.gcwrite`` calls are replaced with the corresponding ``store``
+  instruction.
+
+* No safe points are added to the code.
+
+* The stack map is not compiled into the executable.
+
+Using the LLVM makefiles (like the `sample project
+<http://llvm.org/viewvc/llvm-project/llvm/trunk/projects/sample/>`__), this code
+can be compiled as a plugin using a simple makefile:
+
+.. code-block:: make
+
+  # lib/MyGC/Makefile
+
+  LEVEL := ../..
+  LIBRARYNAME = MyGC
+  LOADABLE_MODULE = 1
+
+  include $(LEVEL)/Makefile.common
+
+Once the plugin is compiled, code using it may be compiled using ``llc
+-load=MyGC.so`` (though MyGC.so may have some other platform-specific
+extension):
+
+::
+
+  $ cat sample.ll
+  define void @f() gc "mygc" {
+  entry:
+          ret void
+  }
+  $ llvm-as < sample.ll | llc -load=MyGC.so
+
+It is also possible to statically link the collector plugin into tools, such as
+a language-specific compiler front-end.
+
+.. _collector-algos:
+
+Overview of available features
+------------------------------
+
+``GCStrategy`` provides a range of features through which a plugin may do useful
+work.  Some of these are callbacks, some are algorithms that can be enabled,
+disabled, or customized.  This matrix summarizes the supported (and planned)
+features and correlates them with the collection techniques which typically
+require them.
+
+.. |v| unicode:: 0x2714
+   :trim:
+
+.. |x| unicode:: 0x2718
+   :trim:
+
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| Algorithm  | Done | Shadow | refcount | mark- | copying | incremental | threaded | concurrent |
+|            |      | stack  |          | sweep |         |             |          |            |
++============+======+========+==========+=======+=========+=============+==========+============+
+| stack map  | |v|  |        |          | |x|   | |x|     | |x|         | |x|      | |x|        |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| initialize | |v|  | |x|    | |x|      | |x|   | |x|     | |x|         | |x|      | |x|        |
+| roots      |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| derived    | NO   |        |          |       |         |             | **N**\*  | **N**\*    |
+| pointers   |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| **custom   | |v|  |        |          |       |         |             |          |            |
+| lowering** |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *gcroot*   | |v|  | |x|    | |x|      |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *gcwrite*  | |v|  |        | |x|      |       |         | |x|         |          | |x|        |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *gcread*   | |v|  |        |          |       |         |             |          | |x|        |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| **safe     |      |        |          |       |         |             |          |            |
+| points**   |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *in        | |v|  |        |          | |x|   | |x|     | |x|         | |x|      | |x|        |
+| calls*     |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *before    | |v|  |        |          |       |         |             | |x|      | |x|        |
+| calls*     |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *for       | NO   |        |          |       |         |             | **N**    | **N**      |
+| loops*     |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *before    | |v|  |        |          |       |         |             | |x|      | |x|        |
+| escape*    |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| emit code  | NO   |        |          |       |         |             | **N**    | **N**      |
+| at safe    |      |        |          |       |         |             |          |            |
+| points     |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| **output** |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *assembly* | |v|  |        |          | |x|   | |x|     | |x|         | |x|      | |x|        |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *JIT*      | NO   |        |          | **?** | **?**   | **?**       | **?**    | **?**      |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| *obj*      | NO   |        |          | **?** | **?**   | **?**       | **?**    | **?**      |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| live       | NO   |        |          | **?** | **?**   | **?**       | **?**    | **?**      |
+| analysis   |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| register   | NO   |        |          | **?** | **?**   | **?**       | **?**    | **?**      |
+| map        |      |        |          |       |         |             |          |            |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| \* Derived pointers only pose a hasard to copying collections.                                |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+| **?** denotes a feature which could be utilized if available.                                 |
++------------+------+--------+----------+-------+---------+-------------+----------+------------+
+
+To be clear, the collection techniques above are defined as:
+
+Shadow Stack
+  The mutator carefully maintains a linked list of stack roots.
+
+Reference Counting
+  The mutator maintains a reference count for each object and frees an object
+  when its count falls to zero.
+
+Mark-Sweep
+  When the heap is exhausted, the collector marks reachable objects starting
+  from the roots, then deallocates unreachable objects in a sweep phase.
+
+Copying
+  As reachability analysis proceeds, the collector copies objects from one heap
+  area to another, compacting them in the process.  Copying collectors enable
+  highly efficient "bump pointer" allocation and can improve locality of
+  reference.
+
+Incremental
+  (Including generational collectors.) Incremental collectors generally have all
+  the properties of a copying collector (regardless of whether the mature heap
+  is compacting), but bring the added complexity of requiring write barriers.
+
+Threaded
+  Denotes a multithreaded mutator; the collector must still stop the mutator
+  ("stop the world") before beginning reachability analysis.  Stopping a
+  multithreaded mutator is a complicated problem.  It generally requires highly
+  platform specific code in the runtime, and the production of carefully
+  designed machine code at safe points.
+
+Concurrent
+  In this technique, the mutator and the collector run concurrently, with the
+  goal of eliminating pause times.  In a *cooperative* collector, the mutator
+  further aids with collection should a pause occur, allowing collection to take
+  advantage of multiprocessor hosts.  The "stop the world" problem of threaded
+  collectors is generally still present to a limited extent.  Sophisticated
+  marking algorithms are necessary.  Read barriers may be necessary.
+
+As the matrix indicates, LLVM's garbage collection infrastructure is already
+suitable for a wide variety of collectors, but does not currently extend to
+multithreaded programs.  This will be added in the future as there is
+interest.
+
+.. _stack-map:
+
+Computing stack maps
+--------------------
+
+LLVM automatically computes a stack map.  One of the most important features
+of a ``GCStrategy`` is to compile this information into the executable in
+the binary representation expected by the runtime library.
+
+The stack map consists of the location and identity of each GC root in the
+each function in the module.  For each root:
+
+* ``RootNum``: The index of the root.
+
+* ``StackOffset``: The offset of the object relative to the frame pointer.
+
+* ``RootMetadata``: The value passed as the ``%metadata`` parameter to the
+  ``@llvm.gcroot`` intrinsic.
+
+Also, for the function as a whole:
+
+* ``getFrameSize()``: The overall size of the function's initial stack frame,
+   not accounting for any dynamic allocation.
+
+* ``roots_size()``: The count of roots in the function.
+
+To access the stack map, use ``GCFunctionMetadata::roots_begin()`` and
+-``end()`` from the :ref:`GCMetadataPrinter <assembly>`:
+
+.. code-block:: c++
+
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    GCFunctionInfo *FI = *I;
+    unsigned FrameSize = FI->getFrameSize();
+    size_t RootCount = FI->roots_size();
+
+    for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
+                                        RE = FI->roots_end();
+                                        RI != RE; ++RI) {
+      int RootNum = RI->Num;
+      int RootStackOffset = RI->StackOffset;
+      Constant *RootMetadata = RI->Metadata;
+    }
+  }
+
+If the ``llvm.gcroot`` intrinsic is eliminated before code generation by a
+custom lowering pass, LLVM will compute an empty stack map.  This may be useful
+for collector plugins which implement reference counting or a shadow stack.
+
+.. _init-roots:
+
+Initializing roots to null: ``InitRoots``
+-----------------------------------------
+
+.. code-block:: c++
+
+  MyGC::MyGC() {
+    InitRoots = true;
+  }
+
+When set, LLVM will automatically initialize each root to ``null`` upon entry to
+the function.  This prevents the GC's sweep phase from visiting uninitialized
+pointers, which will almost certainly cause it to crash.  This initialization
+occurs before custom lowering, so the two may be used together.
+
+Since LLVM does not yet compute liveness information, there is no means of
+distinguishing an uninitialized stack root from an initialized one.  Therefore,
+this feature should be used by all GC plugins.  It is enabled by default.
+
+.. _custom:
+
+Custom lowering of intrinsics: ``CustomRoots``, ``CustomReadBarriers``, and ``CustomWriteBarriers``
+---------------------------------------------------------------------------------------------------
+
+For GCs which use barriers or unusual treatment of stack roots, these flags
+allow the collector to perform arbitrary transformations of the LLVM IR:
+
+.. code-block:: c++
+
+  class MyGC : public GCStrategy {
+  public:
+    MyGC() {
+      CustomRoots = true;
+      CustomReadBarriers = true;
+      CustomWriteBarriers = true;
+    }
+
+    virtual bool initializeCustomLowering(Module &M);
+    virtual bool performCustomLowering(Function &F);
+  };
+
+If any of these flags are set, then LLVM suppresses its default lowering for the
+corresponding intrinsics and instead calls ``performCustomLowering``.
+
+LLVM's default action for each intrinsic is as follows:
+
+* ``llvm.gcroot``: Leave it alone.  The code generator must see it or the stack
+  map will not be computed.
+
+* ``llvm.gcread``: Substitute a ``load`` instruction.
+
+* ``llvm.gcwrite``: Substitute a ``store`` instruction.
+
+If ``CustomReadBarriers`` or ``CustomWriteBarriers`` are specified, then
+``performCustomLowering`` **must** eliminate the corresponding barriers.
+
+``performCustomLowering`` must comply with the same restrictions as
+`FunctionPass::runOnFunction <WritingAnLLVMPass.html#runOnFunction>`__
+Likewise, ``initializeCustomLowering`` has the same semantics as
+`Pass::doInitialization(Module&)
+<WritingAnLLVMPass.html#doInitialization_mod>`__
+
+The following can be used as a template:
+
+.. code-block:: c++
+
+  #include "llvm/Module.h"
+  #include "llvm/IntrinsicInst.h"
+
+  bool MyGC::initializeCustomLowering(Module &M) {
+    return false;
+  }
+
+  bool MyGC::performCustomLowering(Function &F) {
+    bool MadeChange = false;
+
+    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+      for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; )
+        if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+          if (Function *F = CI->getCalledFunction())
+            switch (F->getIntrinsicID()) {
+            case Intrinsic::gcwrite:
+              // Handle llvm.gcwrite.
+              CI->eraseFromParent();
+              MadeChange = true;
+              break;
+            case Intrinsic::gcread:
+              // Handle llvm.gcread.
+              CI->eraseFromParent();
+              MadeChange = true;
+              break;
+            case Intrinsic::gcroot:
+              // Handle llvm.gcroot.
+              CI->eraseFromParent();
+              MadeChange = true;
+              break;
+            }
+
+    return MadeChange;
+  }
+
+.. _safe-points:
+
+Generating safe points: ``NeededSafePoints``
+--------------------------------------------
+
+LLVM can compute four kinds of safe points:
+
+.. code-block:: c++
+
+  namespace GC {
+    /// PointKind - The type of a collector-safe point.
+    ///
+    enum PointKind {
+      Loop,    //< Instr is a loop (backwards branch).
+      Return,  //< Instr is a return instruction.
+      PreCall, //< Instr is a call instruction.
+      PostCall //< Instr is the return address of a call.
+    };
+  }
+
+A collector can request any combination of the four by setting the
+``NeededSafePoints`` mask:
+
+.. code-block:: c++
+
+  MyGC::MyGC()  {
+    NeededSafePoints = 1 << GC::Loop
+                     | 1 << GC::Return
+                     | 1 << GC::PreCall
+                     | 1 << GC::PostCall;
+  }
+
+It can then use the following routines to access safe points.
+
+.. code-block:: c++
+
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    GCFunctionInfo *MD = *I;
+    size_t PointCount = MD->size();
+
+    for (GCFunctionInfo::iterator PI = MD->begin(),
+                                  PE = MD->end(); PI != PE; ++PI) {
+      GC::PointKind PointKind = PI->Kind;
+      unsigned PointNum = PI->Num;
+    }
+  }
+
+Almost every collector requires ``PostCall`` safe points, since these correspond
+to the moments when the function is suspended during a call to a subroutine.
+
+Threaded programs generally require ``Loop`` safe points to guarantee that the
+application will reach a safe point within a bounded amount of time, even if it
+is executing a long-running loop which contains no function calls.
+
+Threaded collectors may also require ``Return`` and ``PreCall`` safe points to
+implement "stop the world" techniques using self-modifying code, where it is
+important that the program not exit the function without reaching a safe point
+(because only the topmost function has been patched).
+
+.. _assembly:
+
+Emitting assembly code: ``GCMetadataPrinter``
+---------------------------------------------
+
+LLVM allows a plugin to print arbitrary assembly code before and after the rest
+of a module's assembly code.  At the end of the module, the GC can compile the
+LLVM stack map into assembly code. (At the beginning, this information is not
+yet computed.)
+
+Since AsmWriter and CodeGen are separate components of LLVM, a separate abstract
+base class and registry is provided for printing assembly code, the
+``GCMetadaPrinter`` and ``GCMetadataPrinterRegistry``.  The AsmWriter will look
+for such a subclass if the ``GCStrategy`` sets ``UsesMetadata``:
+
+.. code-block:: c++
+
+  MyGC::MyGC() {
+    UsesMetadata = true;
+  }
+
+This separation allows JIT-only clients to be smaller.
+
+Note that LLVM does not currently have analogous APIs to support code generation
+in the JIT, nor using the object writers.
+
+.. code-block:: c++
+
+  // lib/MyGC/MyGCPrinter.cpp - Example LLVM GC printer
+
+  #include "llvm/CodeGen/GCMetadataPrinter.h"
+  #include "llvm/Support/Compiler.h"
+
+  using namespace llvm;
+
+  namespace {
+    class LLVM_LIBRARY_VISIBILITY MyGCPrinter : public GCMetadataPrinter {
+    public:
+      virtual void beginAssembly(std::ostream &OS, AsmPrinter &AP,
+                                 const TargetAsmInfo &TAI);
+
+      virtual void finishAssembly(std::ostream &OS, AsmPrinter &AP,
+                                  const TargetAsmInfo &TAI);
+    };
+
+    GCMetadataPrinterRegistry::Add<MyGCPrinter>
+    X("mygc", "My bespoke garbage collector.");
+  }
+
+The collector should use ``AsmPrinter`` and ``TargetAsmInfo`` to print portable
+assembly code to the ``std::ostream``.  The collector itself contains the stack
+map for the entire module, and may access the ``GCFunctionInfo`` using its own
+``begin()`` and ``end()`` methods.  Here's a realistic example:
+
+.. code-block:: c++
+
+  #include "llvm/CodeGen/AsmPrinter.h"
+  #include "llvm/Function.h"
+  #include "llvm/Target/TargetMachine.h"
+  #include "llvm/DataLayout.h"
+  #include "llvm/Target/TargetAsmInfo.h"
+
+  void MyGCPrinter::beginAssembly(std::ostream &OS, AsmPrinter &AP,
+                                  const TargetAsmInfo &TAI) {
+    // Nothing to do.
+  }
+
+  void MyGCPrinter::finishAssembly(std::ostream &OS, AsmPrinter &AP,
+                                   const TargetAsmInfo &TAI) {
+    // Set up for emitting addresses.
+    const char *AddressDirective;
+    int AddressAlignLog;
+    if (AP.TM.getDataLayout()->getPointerSize() == sizeof(int32_t)) {
+      AddressDirective = TAI.getData32bitsDirective();
+      AddressAlignLog = 2;
+    } else {
+      AddressDirective = TAI.getData64bitsDirective();
+      AddressAlignLog = 3;
+    }
+
+    // Put this in the data section.
+    AP.SwitchToDataSection(TAI.getDataSection());
+
+    // For each function...
+    for (iterator FI = begin(), FE = end(); FI != FE; ++FI) {
+      GCFunctionInfo &MD = **FI;
+
+      // Emit this data structure:
+      //
+      // struct {
+      //   int32_t PointCount;
+      //   struct {
+      //     void *SafePointAddress;
+      //     int32_t LiveCount;
+      //     int32_t LiveOffsets[LiveCount];
+      //   } Points[PointCount];
+      // } __gcmap_<FUNCTIONNAME>;
+
+      // Align to address width.
+      AP.EmitAlignment(AddressAlignLog);
+
+      // Emit the symbol by which the stack map entry can be found.
+      std::string Symbol;
+      Symbol += TAI.getGlobalPrefix();
+      Symbol += "__gcmap_";
+      Symbol += MD.getFunction().getName();
+      if (const char *GlobalDirective = TAI.getGlobalDirective())
+        OS << GlobalDirective << Symbol << "\n";
+      OS << TAI.getGlobalPrefix() << Symbol << ":\n";
+
+      // Emit PointCount.
+      AP.EmitInt32(MD.size());
+      AP.EOL("safe point count");
+
+      // And each safe point...
+      for (GCFunctionInfo::iterator PI = MD.begin(),
+                                       PE = MD.end(); PI != PE; ++PI) {
+        // Align to address width.
+        AP.EmitAlignment(AddressAlignLog);
+
+        // Emit the address of the safe point.
+        OS << AddressDirective
+           << TAI.getPrivateGlobalPrefix() << "label" << PI->Num;
+        AP.EOL("safe point address");
+
+        // Emit the stack frame size.
+        AP.EmitInt32(MD.getFrameSize());
+        AP.EOL("stack frame size");
+
+        // Emit the number of live roots in the function.
+        AP.EmitInt32(MD.live_size(PI));
+        AP.EOL("live root count");
+
+        // And for each live root...
+        for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
+                                           LE = MD.live_end(PI);
+                                           LI != LE; ++LI) {
+          // Print its offset within the stack frame.
+          AP.EmitInt32(LI->StackOffset);
+          AP.EOL("stack offset");
+        }
+      }
+    }
+  }
+
+References
+==========
+
+.. _appel89:
+
+[Appel89] Runtime Tags Aren't Necessary. Andrew W. Appel. Lisp and Symbolic
+Computation 19(7):703-705, July 1989.
+
+.. _goldberg91:
+
+[Goldberg91] Tag-free garbage collection for strongly typed programming
+languages. Benjamin Goldberg. ACM SIGPLAN PLDI'91.
+
+.. _tolmach94:
+
+[Tolmach94] Tag-free garbage collection using explicit type parameters. Andrew
+Tolmach. Proceedings of the 1994 ACM conference on LISP and functional
+programming.
+
+.. _henderson02:
+
+[Henderson2002] `Accurate Garbage Collection in an Uncooperative Environment
+<http://citeseer.ist.psu.edu/henderson02accurate.html>`__
+
diff --git a/docs/GetElementPtr.rst b/docs/GetElementPtr.rst
index f6f904b2e3..3b57d78cf1 100644
--- a/docs/GetElementPtr.rst
+++ b/docs/GetElementPtr.rst
@@ -22,7 +22,7 @@ Address Computation
 When people are first confronted with the GEP instruction, they tend to relate
 it to known concepts from other programming paradigms, most notably C array
 indexing and field selection. GEP closely resembles C array indexing and field
-selection, however it's is a little different and this leads to the following
+selection, however it is a little different and this leads to the following
 questions.
 
 What is the first index of the GEP instruction?
@@ -190,7 +190,7 @@ In this example, we have a global variable, ``%MyVar`` that is a pointer to a
 structure containing a pointer to an array of 40 ints. The GEP instruction seems
 to be accessing the 18th integer of the structure's array of ints. However, this
 is actually an illegal GEP instruction. It won't compile. The reason is that the
-pointer in the structure <i>must</i> be dereferenced in order to index into the
+pointer in the structure *must* be dereferenced in order to index into the
 array of 40 ints. Since the GEP instruction never accesses memory, it is
 illegal.
 
@@ -416,7 +416,7 @@ arithmetic, and inttoptr sequences.
 Can I compute the distance between two objects, and add that value to one address to compute the other address?
 ---------------------------------------------------------------------------------------------------------------
 
-As with arithmetic on null, You can use GEP to compute an address that way, but
+As with arithmetic on null, you can use GEP to compute an address that way, but
 you can't use that pointer to actually access the object if you do, unless the
 object is managed outside of LLVM.
 
diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst
index f17313506c..8902684c98 100644
--- a/docs/GettingStarted.rst
+++ b/docs/GettingStarted.rst
@@ -94,7 +94,7 @@ Here's the short story for getting up and running quickly with LLVM:
      running ``svn update``.
 
    * It is also possible to use CMake instead of the makefiles. With CMake it is
-     also possible to generate project files for several IDEs: Eclipse CDT4,
+     possible to generate project files for several IDEs: Xcode, Eclipse CDT4,
      CodeBlocks, Qt-Creator (use the CodeBlocks generator), KDevelop3.
 
    * If you get an "internal compiler error (ICE)" or test failures, see
diff --git a/docs/HowToReleaseLLVM.html b/docs/HowToReleaseLLVM.html
deleted file mode 100644
index 30c3d5da5e..0000000000
--- a/docs/HowToReleaseLLVM.html
+++ /dev/null
@@ -1,581 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <title>How To Release LLVM To The Public</title>
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-
-<h1>How To Release LLVM To The Public</h1>
-<ol>
-  <li><a href="#introduction">Introduction</a></li>
-  <li><a href="#criteria">Qualification Criteria</a></li>
-  <li><a href="#introduction">Release Timeline</a></li>
-  <li><a href="#process">Release Process</a></li>
-</ol>
-<div class="doc_author">
-  <p>Written by <a href="mailto:tonic@nondot.org">Tanya Lattner</a>,
-  <a href="mailto:rspencer@x10sys.com">Reid Spencer</a>,
-  <a href="mailto:criswell@cs.uiuc.edu">John Criswell</a>, &amp;
-  <a href="mailto:wendling@apple.com">Bill Wendling</a>
-  </p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="introduction">Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This document contains information about successfully releasing LLVM &mdash;
-   including subprojects: e.g., <tt>clang</tt> and <tt>dragonegg</tt> &mdash; to
-   the public. It is the Release Manager's responsibility to ensure that a high
-   quality build of LLVM is released.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="process">Release Timeline</a></h2>
-<!-- *********************************************************************** -->
-<div>
-
-<p>LLVM is released on a time based schedule &mdash; roughly every 6 months. We
-   do not normally have dot releases because of the nature of LLVM's incremental
-   development philosophy. That said, the only thing preventing dot releases for
-   critical bug fixes from happening is a lack of resources &mdash; testers,
-   machines, time, etc. And, because of the high quality we desire for LLVM
-   releases, we cannot allow for a truncated form of release qualification.</p>
-
-<p>The release process is roughly as follows:</p>
-
-<ul>
-  <li><p>Set code freeze and branch creation date for 6 months after last code
-      freeze date. Announce release schedule to the LLVM community and update
-      the website.</p></li>
-
-  <li><p>Create release branch and begin release process.</p></li>
-
-  <li><p>Send out release candidate sources for first round of testing. Testing
-      lasts 7-10 days. During the first round of testing, any regressions found
-      should be fixed. Patches are merged from mainline into the release
-      branch. Also, all features need to be completed during this time. Any
-      features not completed at the end of the first round of testing will be
-      removed or disabled for the release.</p></li>
-
-  <li><p>Generate and send out the second release candidate sources. Only
-      <em>critial</em> bugs found during this testing phase will be fixed. Any
-      bugs introduced by merged patches will be fixed. If so a third round of
-      testing is needed.</p></li>
-
-  <li><p>The release notes are updated.</p></li>
-
-  <li><p>Finally, release!</p></li>
-</ul>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="process">Release Process</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<ol>
-  <li><a href="#release-admin">Release Administrative Tasks</a>
-  <ol>
-    <li><a href="#branch">Create Release Branch</a></li>
-    <li><a href="#verchanges">Update Version Numbers</a></li>
-  </ol>
-  </li>
-  <li><a href="#release-build">Building the Release</a>
-  <ol>
-    <li><a href="#dist">Build the LLVM Source Distributions</a></li>
-    <li><a href="#build">Build LLVM</a></li>
-    <li><a href="#clangbin">Build the Clang Binary Distribution</a></li>
-    <li><a href="#target-build">Target Specific Build Details</a></li>
-  </ol>
-  </li>
-  <li><a href="#release-qualify">Release Qualification Criteria</a>
-  <ol>
-    <li><a href="#llvm-qualify">Qualify LLVM</a></li>
-    <li><a href="#clang-qualify">Qualify Clang</a></li>
-    <li><a href="#targets">Specific Target Qualification Details</a></li>
-  </ol>
-  </li>
-
-  <li><a href="#commTest">Community Testing</a></li>    
-  <li><a href="#release-patch">Release Patch Rules</a></li>
-  <li><a href="#release-final">Release final tasks</a>
-  <ol>
-    <li><a href="#updocs">Update Documentation</a></li>
-    <li><a href="#tag">Tag the LLVM Final Release</a></li>
-    <li><a href="#updemo">Update the LLVM Demo Page</a></li>
-    <li><a href="#webupdates">Update the LLVM Website</a></li>
-    <li><a href="#announce">Announce the Release</a></li>
-  </ol>
-  </li>
-</ol>
-
-<!-- ======================================================================= -->
-<h3><a name="release-admin">Release Administrative Tasks</a></h3>
-
-<div>
-
-<p>This section describes a few administrative tasks that need to be done for
-   the release process to begin. Specifically, it involves:</p>
-
-<ul>
-  <li>Creating the release branch,</li>
-  <li>Setting version numbers, and</li>
-  <li>Tagging release candidates for the release team to begin testing</li>
-</ul>
-
-<!-- ======================================================================= -->
-<h4><a name="branch">Create Release Branch</a></h4>
-
-<div>
-
-<p>Branch the Subversion trunk using the following procedure:</p>
-
-<ol>
-  <li><p>Remind developers that the release branching is imminent and to refrain
-      from committing patches that might break the build. E.g., new features,
-      large patches for works in progress, an overhaul of the type system, an
-      exciting new TableGen feature, etc.</p></li>
-
-  <li><p>Verify that the current Subversion trunk is in decent shape by
-      examining nightly tester and buildbot results.</p></li>
-
-  <li><p>Create the release branch for <tt>llvm</tt>, <tt>clang</tt>,
-      the <tt>test-suite</tt>, and <tt>dragonegg</tt> from the last known good
-      revision. The branch's name is <tt>release_<i>XY</i></tt>,
-      where <tt>X</tt> is the major and <tt>Y</tt> the minor release
-      numbers. The branches should be created using the following commands:</p>
-  
-<div class="doc_code">
-<pre>
-$ svn copy https://llvm.org/svn/llvm-project/llvm/trunk \
-           https://llvm.org/svn/llvm-project/llvm/branches/release_<i>XY</i>
-
-$ svn copy https://llvm.org/svn/llvm-project/cfe/trunk \
-           https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XY</i>
-
-$ svn copy https://llvm.org/svn/llvm-project/dragonegg/trunk \
-           https://llvm.org/svn/llvm-project/dragonegg/branches/release_<i>XY</i>
-
-$ svn copy https://llvm.org/svn/llvm-project/test-suite/trunk \
-           https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XY</i>
-</pre>
-</div></li>
-
-  <li><p>Advise developers that they may now check their patches into the
-      Subversion tree again.</p></li>
-
-  <li><p>The Release Manager should switch to the release branch, because all
-      changes to the release will now be done in the branch. The easiest way to
-      do this is to grab a working copy using the following commands:</p>
-
-<div class="doc_code">
-<pre>
-$ svn co https://llvm.org/svn/llvm-project/llvm/branches/release_<i>XY</i> llvm-<i>X.Y</i>
-
-$ svn co https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XY</i> clang-<i>X.Y</i>
-
-$ svn co https://llvm.org/svn/llvm-project/dragonegg/branches/release_<i>XY</i> dragonegg-<i>X.Y</i>
-
-$ svn co https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XY</i> test-suite-<i>X.Y</i>
-</pre>
-</div></li>
-</ol>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="verchanges">Update LLVM Version</a></h4>
-
-<div>
-
-<p>After creating the LLVM release branch, update the release branches'
-   <tt>autoconf</tt> and <tt>configure.ac</tt> versions from '<tt>X.Ysvn</tt>'
-   to '<tt>X.Y</tt>'. Update it on mainline as well to be the next version
-   ('<tt>X.Y+1svn</tt>'). Regenerate the configure scripts for both
-   <tt>llvm</tt> and the <tt>test-suite</tt>.</p>
-
-<p>In addition, the version numbers of all the Bugzilla components must be
-   updated for the next release.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="dist">Build the LLVM Release Candidates</a></h4>
-
-<div>
-
-<p>Create release candidates for <tt>llvm</tt>, <tt>clang</tt>,
-   <tt>dragonegg</tt>, and the LLVM <tt>test-suite</tt> by tagging the branch
-   with the respective release candidate number. For instance, to
-   create <b>Release Candidate 1</b> you would issue the following commands:</p>
-
-<div class="doc_code">
-<pre>
-$ svn mkdir https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XY</i>
-$ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_<i>XY</i> \
-           https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XY</i>/rc1
-
-$ svn mkdir https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_<i>XY</i>
-$ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XY</i> \
-           https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_<i>XY</i>/rc1
-
-$ svn mkdir https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_<i>XY</i>
-$ svn copy https://llvm.org/svn/llvm-project/dragonegg/branches/release_<i>XY</i> \
-           https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_<i>XY</i>/rc1
-
-$ svn mkdir https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XY</i>
-$ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XY</i> \
-           https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XY</i>/rc1
-</pre>
-</div>
-
-<p>Similarly, <b>Release Candidate 2</b> would be named <tt>RC2</tt> and so
-   on. This keeps a permanent copy of the release candidate around for people to
-   export and build as they wish. The final released sources will be tagged in
-   the <tt>RELEASE_<i>XY</i></tt> directory as <tt>Final</tt>
-   (c.f. <a href="#tag">Tag the LLVM Final Release</a>).</p>
-
-<p>The Release Manager may supply pre-packaged source tarballs for users. This
-   can be done with the following commands:</p>
-
-<div class="doc_code">
-<pre>
-$ svn export https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XY</i>/rc1 llvm-<i>X.Y</i>rc1
-$ svn export https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_<i>XY</i>/rc1 clang-<i>X.Y</i>rc1
-$ svn export https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_<i>XY</i>/rc1 dragonegg-<i>X.Y</i>rc1
-$ svn export https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XY</i>/rc1 llvm-test-<i>X.Y</i>rc1
-
-$ tar -cvf - llvm-<i>X.Y</i>rc1        | gzip &gt; llvm-<i>X.Y</i>rc1.src.tar.gz
-$ tar -cvf - clang-<i>X.Y</i>rc1       | gzip &gt; clang-<i>X.Y</i>rc1.src.tar.gz
-$ tar -cvf - dragonegg-<i>X.Y</i>rc1   | gzip &gt; dragonegg-<i>X.Y</i>rc1.src.tar.gz
-$ tar -cvf - llvm-test-<i>X.Y</i>rc1   | gzip &gt; llvm-test-<i>X.Y</i>rc1.src.tar.gz
-</pre>
-</div>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="release-build">Building the Release</a></h3>
-
-<div>
-
-<p>The builds of <tt>llvm</tt>, <tt>clang</tt>, and <tt>dragonegg</tt>
-   <em>must</em> be free of errors and warnings in Debug, Release+Asserts, and
-   Release builds. If all builds are clean, then the release passes Build
-   Qualification.</p>
-
-<p>The <tt>make</tt> options for building the different modes:</p>
-
-<table>
-  <tr><th>Mode</th><th>Options</th></tr>
-  <tr align="left"><td>Debug</td><td><tt>ENABLE_OPTIMIZED=0</tt></td></tr>
-  <tr align="left"><td>Release+Asserts</td><td><tt>ENABLE_OPTIMIZED=1</tt></td></tr>
-  <tr align="left"><td>Release</td><td><tt>ENABLE_OPTIMIZED=1 DISABLE_ASSERTIONS=1</tt></td></tr>
-</table>
-
-<!-- ======================================================================= -->
-<h4><a name="build">Build LLVM</a></h4>
-
-<div>
-
-<p>Build <tt>Debug</tt>, <tt>Release+Asserts</tt>, and <tt>Release</tt> versions
-   of <tt>llvm</tt> on all supported platforms. Directions to build
-   <tt>llvm</tt> are <a href="GettingStarted.html#quickstart">here</a>.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="clangbin">Build Clang Binary Distribution</a></h4>
-
-<div>
-
-<p>Creating the <tt>clang</tt> binary distribution
-   (Debug/Release+Asserts/Release) requires performing the following steps for
-   each supported platform:</p>
-
-<ol>
-  <li>Build clang according to the directions
-      <a href="http://clang.llvm.org/get_started.html">here</a>.</li>
-
-  <li>Build both a Debug and Release version of clang. The binary will be the
-      Release build.</lI>
-
-  <li>Package <tt>clang</tt> (details to follow).</li>
-</ol>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="target-build">Target Specific Build Details</a></h4>
-
-<div>
-
-<p>The table below specifies which compilers are used for each Arch/OS
-   combination when qualifying the build of <tt>llvm</tt>, <tt>clang</tt>,
-   and <tt>dragonegg</tt>.</p>
-
-<table>
-  <tr><th>Architecture</th> <th>OS</th>          <th>compiler</th></tr>
-  <tr><td>x86-32</td>       <td>Mac OS 10.5</td> <td>gcc 4.0.1</td></tr>
-  <tr><td>x86-32</td>       <td>Linux</td>       <td>gcc 4.2.X, gcc 4.3.X</td></tr>
-  <tr><td>x86-32</td>       <td>FreeBSD</td>     <td>gcc 4.2.X</td></tr>
-  <tr><td>x86-32</td>       <td>mingw</td>       <td>gcc 3.4.5</td></tr>
-  <tr><td>x86-64</td>       <td>Mac OS 10.5</td> <td>gcc 4.0.1</td></tr>
-  <tr><td>x86-64</td>       <td>Linux</td>       <td>gcc 4.2.X, gcc 4.3.X</td></tr>
-  <tr><td>x86-64</td>       <td>FreeBSD</td>     <td>gcc 4.2.X</td></tr>
-</table> 
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="release-qualify">Building the Release</a></h3>
-
-<div>
-
-<p>A release is qualified when it has no regressions from the previous release
-   (or baseline). Regressions are related to correctness first and performance
-   second. (We may tolerate some minor performance regressions if they are
-   deemed necessary for the general quality of the compiler.)</p>
-
-<p><b>Regressions are new failures in the set of tests that are used to qualify
-   each product and only include things on the list. Every release will have
-   some bugs in it. It is the reality of developing a complex piece of
-   software. We need a very concrete and definitive release criteria that
-   ensures we have monotonically improving quality on some metric. The metric we
-   use is described below. This doesn't mean that we don't care about other
-   criteria, but these are the criteria which we found to be most important and
-   which must be satisfied before a release can go out</b></p>
-
-<!-- ======================================================================= -->
-<h4><a name="llvm-qualify">Qualify LLVM</a></h4>
-
-<div>
-
-<p>LLVM is qualified when it has a clean test run without a front-end. And it
-   has no regressions when using either <tt>clang</tt> or <tt>dragonegg</tt>
-   with the <tt>test-suite</tt> from the previous release.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="clang-qualify">Qualify Clang</a></h4>
-
-<div>
-
-<p><tt>Clang</tt> is qualified when front-end specific tests in the 
-   <tt>llvm</tt> dejagnu test suite all pass, clang's own test suite passes
-   cleanly, and there are no regressions in the <tt>test-suite</tt>.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="targets">Specific Target Qualification Details</a></h4>
-
-<div>
-
-<table>
-  <tr><th>Architecture</th> <th>OS</th>          <th>clang baseline</th> <th>tests</th></tr>
-  <tr><td>x86-32</td>       <td>Linux</td>       <td>last release</td>   <td>llvm dejagnu, clang tests, test-suite (including spec)</td></tr>
-  <tr><td>x86-32</td>       <td>FreeBSD</td>     <td>last release</td>   <td>llvm dejagnu, clang tests, test-suite</td></tr>
-  <tr><td>x86-32</td>       <td>mingw</td>       <td>none</td>           <td>QT</td></tr>
-  <tr><td>x86-64</td>       <td>Mac OS 10.X</td> <td>last release</td>   <td>llvm dejagnu, clang tests, test-suite (including spec)</td></tr>
-  <tr><td>x86-64</td>       <td>Linux</td>       <td>last release</td>   <td>llvm dejagnu, clang tests, test-suite (including spec)</td></tr>
-  <tr><td>x86-64</td>       <td>FreeBSD</td>     <td>last release</td>   <td>llvm dejagnu, clang tests, test-suite</td></tr>
-</table>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="commTest">Community Testing</a></h3>
-<div>
-
-<p>Once all testing has been completed and appropriate bugs filed, the release
-   candidate tarballs are put on the website and the LLVM community is
-   notified. Ask that all LLVM developers test the release in 2 ways:</p>
-
-<ol>
-  <li>Download <tt>llvm-<i>X.Y</i></tt>, <tt>llvm-test-<i>X.Y</i></tt>, and the
-      appropriate <tt>clang</tt> binary. Build LLVM. Run <tt>make check</tt> and
-      the full LLVM test suite (<tt>make TEST=nightly report</tt>).</li>
-
-  <li>Download <tt>llvm-<i>X.Y</i></tt>, <tt>llvm-test-<i>X.Y</i></tt>, and the
-      <tt>clang</tt> sources. Compile everything. Run <tt>make check</tt> and
-      the full LLVM test suite (<tt>make TEST=nightly report</tt>).</li>
-</ol>
-
-<p>Ask LLVM developers to submit the test suite report and <tt>make check</tt>
-   results to the list. Verify that there are no regressions from the previous
-   release. The results are not used to qualify a release, but to spot other
-   potential problems. For unsupported targets, verify that <tt>make check</tt>
-   is at least clean.</p>
-  
-<p>During the first round of testing, all regressions must be fixed before the
-   second release candidate is tagged.</p>
-  
-<p>If this is the second round of testing, the testing is only to ensure that
-   bug fixes previously merged in have not created new major problems. <i>This
-   is not the time to solve additional and unrelated bugs!</i> If no patches are
-   merged in, the release is determined to be ready and the release manager may
-   move onto the next stage.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="release-patch">Release Patch Rules</a></h3>
-
-<div>
-
-<p>Below are the rules regarding patching the release branch:</p>
-
-<ol>
-  <li><p>Patches applied to the release branch may only be applied by the
-      release manager.</p></li>
-
-  <li><p>During the first round of testing, patches that fix regressions or that
-      are small and relatively risk free (verified by the appropriate code
-      owner) are applied to the branch. Code owners are asked to be very
-      conservative in approving patches for the branch. We reserve the right to
-      reject any patch that does not fix a regression as previously
-      defined.</p></li>
-
-  <li><p>During the remaining rounds of testing, only patches that fix critical
-      regressions may be applied.</p></li>
-</ol>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="release-final">Release Final Tasks</a></h3>
-
-<div>
-
-<p>The final stages of the release process involves tagging the "final" release
-   branch, updating documentation that refers to the release, and updating the
-   demo page.</p>
-
-<!-- ======================================================================= -->
-<h4><a name="updocs">Update Documentation</a></h4>
-
-<div>
-
-<p>Review the documentation and ensure that it is up to date. The "Release
-   Notes" must be updated to reflect new features, bug fixes, new known issues,
-   and changes in the list of supported platforms. The "Getting Started Guide"
-   should be updated to reflect the new release version number tag available from
-   Subversion and changes in basic system requirements. Merge both changes from
-   mainline into the release branch.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="tag">Tag the LLVM Final Release</a></h4>
-
-<div>
-
-<p>Tag the final release sources using the following procedure:</p>
-
-<div class="doc_code">
-<pre>
-$ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \
-           https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XY</i>/Final
-
-$ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \
-           https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_<i>XY</i>/Final
-
-$ svn copy https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY \
-           https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_<i>XY</i>/Final
-
-$ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \
-           https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XY</i>/Final
-</pre>
-</div>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3><a name="updemo">Update the LLVM Demo Page</a></h3>
-
-<div>
-
-<p>The LLVM demo page must be updated to use the new release. This consists of
-   using the new <tt>clang</tt> binary and building LLVM.</p>
-
-<!-- ======================================================================= -->
-<h4><a name="webupdates">Update the LLVM Website</a></h4>
-
-<div>
-
-<p>The website must be updated before the release announcement is sent out. Here
-   is what to do:</p>
-
-<ol>
-  <li>Check out the <tt>www</tt> module from Subversion.</li>
-
-  <li>Create a new subdirectory <tt>X.Y</tt> in the releases directory.</li>
-
-  <li>Commit the <tt>llvm</tt>, <tt>test-suite</tt>, <tt>clang</tt> source,
-      <tt>clang binaries</tt>, <tt>dragonegg</tt> source, and <tt>dragonegg</tt>
-      binaries in this new directory.</li>
-
-  <li>Copy and commit the <tt>llvm/docs</tt> and <tt>LICENSE.txt</tt> files
-      into this new directory. The docs should be built with
-      <tt>BUILD_FOR_WEBSITE=1</tt>.</li>
-
-  <li>Commit the <tt>index.html</tt> to the <tt>release/X.Y</tt> directory to
-      redirect (use from previous release.</li>
-
-  <li>Update the <tt>releases/download.html</tt> file with the new release.</li>
-
-  <li>Update the <tt>releases/index.html</tt> with the new release and link to
-      release documentation.</li>
-
-  <li>Finally, update the main page (<tt>index.html</tt> and sidebar) to point
-      to the new release and release announcement. Make sure this all gets
-      committed back into Subversion.</li>
-</ol>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="announce">Announce the Release</a></h4>
-
-<div>
-
-<p>Have Chris send out the release announcement when everything is finished.</p>
-
-</div>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
-  <br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/HowToReleaseLLVM.rst b/docs/HowToReleaseLLVM.rst
new file mode 100644
index 0000000000..eb6c838a21
--- /dev/null
+++ b/docs/HowToReleaseLLVM.rst
@@ -0,0 +1,427 @@
+=================================
+How To Release LLVM To The Public
+=================================
+
+.. contents::
+   :local:
+   :depth: 1
+
+.. sectionauthor:: Tanya Lattner <tonic@nondot.org>,
+                   Reid Spencer <rspencer@x10sys.com>,
+                   John Criswell <criswell@cs.uiuc.edu> and
+                   Bill Wendling <wendling@apple.com>
+
+Introduction
+============
+
+This document contains information about successfully releasing LLVM ---
+including subprojects: e.g., ``clang`` and ``dragonegg`` --- to the public.  It
+is the Release Manager's responsibility to ensure that a high quality build of
+LLVM is released.
+
+.. _timeline:
+
+Release Timeline
+================
+
+LLVM is released on a time based schedule --- roughly every 6 months.  We do
+not normally have dot releases because of the nature of LLVM's incremental
+development philosophy.  That said, the only thing preventing dot releases for
+critical bug fixes from happening is a lack of resources --- testers,
+machines, time, etc.  And, because of the high quality we desire for LLVM
+releases, we cannot allow for a truncated form of release qualification.
+
+The release process is roughly as follows:
+
+* Set code freeze and branch creation date for 6 months after last code freeze
+  date.  Announce release schedule to the LLVM community and update the website.
+
+* Create release branch and begin release process.
+
+* Send out release candidate sources for first round of testing.  Testing lasts
+  7-10 days.  During the first round of testing, any regressions found should be
+  fixed.  Patches are merged from mainline into the release branch.  Also, all
+  features need to be completed during this time.  Any features not completed at
+  the end of the first round of testing will be removed or disabled for the
+  release.
+
+* Generate and send out the second release candidate sources.  Only *critial*
+  bugs found during this testing phase will be fixed.  Any bugs introduced by
+  merged patches will be fixed.  If so a third round of testing is needed.
+
+* The release notes are updated.
+
+* Finally, release!
+
+Release Process
+===============
+
+.. contents::
+   :local:
+
+Release Administrative Tasks
+----------------------------
+
+This section describes a few administrative tasks that need to be done for the
+release process to begin.  Specifically, it involves:
+
+* Creating the release branch,
+
+* Setting version numbers, and
+
+* Tagging release candidates for the release team to begin testing.
+
+Create Release Branch
+^^^^^^^^^^^^^^^^^^^^^
+
+Branch the Subversion trunk using the following procedure:
+
+#. Remind developers that the release branching is imminent and to refrain from
+   committing patches that might break the build.  E.g., new features, large
+   patches for works in progress, an overhaul of the type system, an exciting
+   new TableGen feature, etc.
+
+#. Verify that the current Subversion trunk is in decent shape by
+   examining nightly tester and buildbot results.
+
+#. Create the release branch for ``llvm``, ``clang``, the ``test-suite``, and
+   ``dragonegg`` from the last known good revision.  The branch's name is
+   ``release_XY``, where ``X`` is the major and ``Y`` the minor release
+   numbers.  The branches should be created using the following commands:
+
+   ::
+
+     $ svn copy https://llvm.org/svn/llvm-project/llvm/trunk \
+                https://llvm.org/svn/llvm-project/llvm/branches/release_XY
+
+     $ svn copy https://llvm.org/svn/llvm-project/cfe/trunk \
+                https://llvm.org/svn/llvm-project/cfe/branches/release_XY
+
+     $ svn copy https://llvm.org/svn/llvm-project/dragonegg/trunk \
+                https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY
+
+     $ svn copy https://llvm.org/svn/llvm-project/test-suite/trunk \
+                https://llvm.org/svn/llvm-project/test-suite/branches/release_XY
+
+#. Advise developers that they may now check their patches into the Subversion
+   tree again.
+
+#. The Release Manager should switch to the release branch, because all changes
+   to the release will now be done in the branch.  The easiest way to do this is
+   to grab a working copy using the following commands:
+
+   ::
+
+     $ svn co https://llvm.org/svn/llvm-project/llvm/branches/release_XY llvm-X.Y
+
+     $ svn co https://llvm.org/svn/llvm-project/cfe/branches/release_XY clang-X.Y
+
+     $ svn co https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY dragonegg-X.Y
+
+     $ svn co https://llvm.org/svn/llvm-project/test-suite/branches/release_XY test-suite-X.Y
+
+Update LLVM Version
+^^^^^^^^^^^^^^^^^^^
+
+After creating the LLVM release branch, update the release branches'
+``autoconf`` and ``configure.ac`` versions from '``X.Ysvn``' to '``X.Y``'.
+Update it on mainline as well to be the next version ('``X.Y+1svn``').
+Regenerate the configure scripts for both ``llvm`` and the ``test-suite``.
+
+In addition, the version numbers of all the Bugzilla components must be updated
+for the next release.
+
+Build the LLVM Release Candidates
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Create release candidates for ``llvm``, ``clang``, ``dragonegg``, and the LLVM
+``test-suite`` by tagging the branch with the respective release candidate
+number.  For instance, to create **Release Candidate 1** you would issue the
+following commands:
+
+::
+
+  $ svn mkdir https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY
+  $ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \
+             https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY/rc1
+
+  $ svn mkdir https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY
+  $ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \
+             https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY/rc1
+
+  $ svn mkdir https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XY
+  $ svn copy https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY \
+             https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XY/rc1
+
+  $ svn mkdir https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY
+  $ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \
+             https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY/rc1
+
+Similarly, **Release Candidate 2** would be named ``RC2`` and so on.  This keeps
+a permanent copy of the release candidate around for people to export and build
+as they wish.  The final released sources will be tagged in the ``RELEASE_XY``
+directory as ``Final`` (c.f. :ref:`tag`).
+
+The Release Manager may supply pre-packaged source tarballs for users.  This can
+be done with the following commands:
+
+::
+
+  $ svn export https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY/rc1 llvm-X.Yrc1
+  $ svn export https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY/rc1 clang-X.Yrc1
+  $ svn export https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XY/rc1 dragonegg-X.Yrc1
+  $ svn export https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY/rc1 llvm-test-X.Yrc1
+
+  $ tar -cvf - llvm-X.Yrc1        | gzip > llvm-X.Yrc1.src.tar.gz
+  $ tar -cvf - clang-X.Yrc1       | gzip > clang-X.Yrc1.src.tar.gz
+  $ tar -cvf - dragonegg-X.Yrc1   | gzip > dragonegg-X.Yrc1.src.tar.gz
+  $ tar -cvf - llvm-test-X.Yrc1   | gzip > llvm-test-X.Yrc1.src.tar.gz
+
+Building the Release
+--------------------
+
+The builds of ``llvm``, ``clang``, and ``dragonegg`` *must* be free of
+errors and warnings in Debug, Release+Asserts, and Release builds.  If all
+builds are clean, then the release passes Build Qualification.
+
+The ``make`` options for building the different modes:
+
++-----------------+---------------------------------------------+
+| Mode            | Options                                     |
++=================+=============================================+
+| Debug           | ``ENABLE_OPTIMIZED=0``                      |
++-----------------+---------------------------------------------+
+| Release+Asserts | ``ENABLE_OPTIMIZED=1``                      |
++-----------------+---------------------------------------------+
+| Release         | ``ENABLE_OPTIMIZED=1 DISABLE_ASSERTIONS=1`` |
++-----------------+---------------------------------------------+
+
+Build LLVM
+^^^^^^^^^^
+
+Build ``Debug``, ``Release+Asserts``, and ``Release`` versions
+of ``llvm`` on all supported platforms.  Directions to build ``llvm``
+are :ref:`here <getting_started>`.
+
+Build Clang Binary Distribution
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Creating the ``clang`` binary distribution (Debug/Release+Asserts/Release)
+requires performing the following steps for each supported platform:
+
+#. Build clang according to the directions `here
+   <http://clang.llvm.org/get_started.html>`__.
+
+#. Build both a Debug and Release version of clang.  The binary will be the
+   Release build.
+
+#. Package ``clang`` (details to follow).
+
+Target Specific Build Details
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The table below specifies which compilers are used for each Arch/OS combination
+when qualifying the build of ``llvm``, ``clang``, and ``dragonegg``.
+
++--------------+---------------+----------------------+
+| Architecture | OS            | compiler             |
++==============+===============+======================+
+| x86-32       | Mac OS 10.5   | gcc 4.0.1            |
++--------------+---------------+----------------------+
+| x86-32       | Linux         | gcc 4.2.X, gcc 4.3.X |
++--------------+---------------+----------------------+
+| x86-32       | FreeBSD       | gcc 4.2.X            |
++--------------+---------------+----------------------+
+| x86-32       | mingw         | gcc 3.4.5            |
++--------------+---------------+----------------------+
+| x86-64       | Mac OS 10.5   | gcc 4.0.1            |
++--------------+---------------+----------------------+
+| x86-64       | Linux         | gcc 4.2.X, gcc 4.3.X |
++--------------+---------------+----------------------+
+| x86-64       | FreeBSD       | gcc 4.2.X            |
++--------------+---------------+----------------------+
+
+Release Qualification Criteria
+------------------------------
+
+A release is qualified when it has no regressions from the previous release (or
+baseline).  Regressions are related to correctness first and performance second.
+(We may tolerate some minor performance regressions if they are deemed
+necessary for the general quality of the compiler.)
+
+**Regressions are new failures in the set of tests that are used to qualify
+each product and only include things on the list.  Every release will have
+some bugs in it.  It is the reality of developing a complex piece of
+software.  We need a very concrete and definitive release criteria that
+ensures we have monotonically improving quality on some metric.  The metric we
+use is described below.  This doesn't mean that we don't care about other
+criteria, but these are the criteria which we found to be most important and
+which must be satisfied before a release can go out.**
+
+Qualify LLVM
+^^^^^^^^^^^^
+
+LLVM is qualified when it has a clean test run without a front-end.  And it has
+no regressions when using either ``clang`` or ``dragonegg`` with the
+``test-suite`` from the previous release.
+
+Qualify Clang
+^^^^^^^^^^^^^
+
+``Clang`` is qualified when front-end specific tests in the ``llvm`` dejagnu
+test suite all pass, clang's own test suite passes cleanly, and there are no
+regressions in the ``test-suite``.
+
+Specific Target Qualification Details
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++--------------+-------------+----------------+-----------------------------+
+| Architecture | OS          | clang baseline | tests                       |
++==============+=============+================+=============================+
+| x86-32       | Linux       | last release   | llvm dejagnu,               |
+|              |             |                | clang tests,                |
+|              |             |                | test-suite (including spec) |
++--------------+-------------+----------------+-----------------------------+
+| x86-32       | FreeBSD     | last release   | llvm dejagnu,               |
+|              |             |                | clang tests,                |
+|              |             |                | test-suite                  |
++--------------+-------------+----------------+-----------------------------+
+| x86-32       | mingw       | none           | QT                          |
++--------------+-------------+----------------+-----------------------------+
+| x86-64       | Mac OS 10.X | last release   | llvm dejagnu,               |
+|              |             |                | clang tests,                |
+|              |             |                | test-suite (including spec) |
++--------------+-------------+----------------+-----------------------------+
+| x86-64       | Linux       | last release   | llvm dejagnu,               |
+|              |             |                | clang tests,                |
+|              |             |                | test-suite (including spec) |
++--------------+-------------+----------------+-----------------------------+
+| x86-64       | FreeBSD     | last release   | llvm dejagnu,               |
+|              |             |                | clang tests,                |
+|              |             |                | test-suite                  |
++--------------+-------------+----------------+-----------------------------+
+
+Community Testing
+-----------------
+
+Once all testing has been completed and appropriate bugs filed, the release
+candidate tarballs are put on the website and the LLVM community is notified.
+Ask that all LLVM developers test the release in 2 ways:
+
+#. Download ``llvm-X.Y``, ``llvm-test-X.Y``, and the appropriate ``clang``
+   binary.  Build LLVM.  Run ``make check`` and the full LLVM test suite (``make
+   TEST=nightly report``).
+
+#. Download ``llvm-X.Y``, ``llvm-test-X.Y``, and the ``clang`` sources.  Compile
+   everything.  Run ``make check`` and the full LLVM test suite (``make
+   TEST=nightly report``).
+
+Ask LLVM developers to submit the test suite report and ``make check`` results
+to the list.  Verify that there are no regressions from the previous release.
+The results are not used to qualify a release, but to spot other potential
+problems.  For unsupported targets, verify that ``make check`` is at least
+clean.
+
+During the first round of testing, all regressions must be fixed before the
+second release candidate is tagged.
+
+If this is the second round of testing, the testing is only to ensure that bug
+fixes previously merged in have not created new major problems. *This is not
+the time to solve additional and unrelated bugs!* If no patches are merged in,
+the release is determined to be ready and the release manager may move onto the
+next stage.
+
+Release Patch Rules
+-------------------
+
+Below are the rules regarding patching the release branch:
+
+#. Patches applied to the release branch may only be applied by the release
+   manager.
+
+#. During the first round of testing, patches that fix regressions or that are
+   small and relatively risk free (verified by the appropriate code owner) are
+   applied to the branch.  Code owners are asked to be very conservative in
+   approving patches for the branch.  We reserve the right to reject any patch
+   that does not fix a regression as previously defined.
+
+#. During the remaining rounds of testing, only patches that fix critical
+   regressions may be applied.
+
+Release Final Tasks
+-------------------
+
+The final stages of the release process involves tagging the "final" release
+branch, updating documentation that refers to the release, and updating the
+demo page.
+
+Update Documentation
+^^^^^^^^^^^^^^^^^^^^
+
+Review the documentation and ensure that it is up to date.  The "Release Notes"
+must be updated to reflect new features, bug fixes, new known issues, and
+changes in the list of supported platforms.  The "Getting Started Guide" should
+be updated to reflect the new release version number tag available from
+Subversion and changes in basic system requirements.  Merge both changes from
+mainline into the release branch.
+
+.. _tag:
+
+Tag the LLVM Final Release
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Tag the final release sources using the following procedure:
+
+::
+
+  $ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \
+             https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY/Final
+
+  $ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \
+             https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY/Final
+
+  $ svn copy https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY \
+             https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XY/Final
+
+  $ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \
+             https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY/Final
+
+Update the LLVM Demo Page
+-------------------------
+
+The LLVM demo page must be updated to use the new release.  This consists of
+using the new ``clang`` binary and building LLVM.
+
+Update the LLVM Website
+^^^^^^^^^^^^^^^^^^^^^^^
+
+The website must be updated before the release announcement is sent out.  Here
+is what to do:
+
+#. Check out the ``www`` module from Subversion.
+
+#. Create a new subdirectory ``X.Y`` in the releases directory.
+
+#. Commit the ``llvm``, ``test-suite``, ``clang`` source, ``clang binaries``,
+   ``dragonegg`` source, and ``dragonegg`` binaries in this new directory.
+
+#. Copy and commit the ``llvm/docs`` and ``LICENSE.txt`` files into this new
+   directory.  The docs should be built with ``BUILD_FOR_WEBSITE=1``.
+
+#. Commit the ``index.html`` to the ``release/X.Y`` directory to redirect (use
+   from previous release).
+
+#. Update the ``releases/download.html`` file with the new release.
+
+#. Update the ``releases/index.html`` with the new release and link to release
+   documentation.
+
+#. Finally, update the main page (``index.html`` and sidebar) to point to the
+   new release and release announcement.  Make sure this all gets committed back
+   into Subversion.
+
+Announce the Release
+^^^^^^^^^^^^^^^^^^^^
+
+Have Chris send out the release announcement when everything is finished.
+
diff --git a/docs/LangRef.html b/docs/LangRef.html
deleted file mode 100644
index 35ad94d990..0000000000
--- a/docs/LangRef.html
+++ /dev/null
@@ -1,9035 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <title>LLVM Assembly Language Reference Manual</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <meta name="description"
-  content="LLVM Assembly Language Reference Manual.">
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>LLVM Language Reference Manual</h1>
-<ol>
-  <li><a href="#abstract">Abstract</a></li>
-  <li><a href="#introduction">Introduction</a></li>
-  <li><a href="#identifiers">Identifiers</a></li>
-  <li><a href="#highlevel">High Level Structure</a>
-    <ol>
-      <li><a href="#modulestructure">Module Structure</a></li>
-      <li><a href="#linkage">Linkage Types</a>
-        <ol>
-          <li><a href="#linkage_private">'<tt>private</tt>' Linkage</a></li>
-          <li><a href="#linkage_linker_private">'<tt>linker_private</tt>' Linkage</a></li>
-          <li><a href="#linkage_linker_private_weak">'<tt>linker_private_weak</tt>' Linkage</a></li>
-          <li><a href="#linkage_internal">'<tt>internal</tt>' Linkage</a></li>
-          <li><a href="#linkage_available_externally">'<tt>available_externally</tt>' Linkage</a></li>
-          <li><a href="#linkage_linkonce">'<tt>linkonce</tt>' Linkage</a></li>
-          <li><a href="#linkage_common">'<tt>common</tt>' Linkage</a></li>
-          <li><a href="#linkage_weak">'<tt>weak</tt>' Linkage</a></li>
-          <li><a href="#linkage_appending">'<tt>appending</tt>' Linkage</a></li>
-          <li><a href="#linkage_externweak">'<tt>extern_weak</tt>' Linkage</a></li>
-          <li><a href="#linkage_linkonce_odr">'<tt>linkonce_odr</tt>' Linkage</a></li>
-          <li><a href="#linkage_linkonce_odr_auto_hide">'<tt>linkonce_odr_auto_hide</tt>' Linkage</a></li>
-          <li><a href="#linkage_weak">'<tt>weak_odr</tt>' Linkage</a></li>
-          <li><a href="#linkage_external">'<tt>external</tt>' Linkage</a></li>
-          <li><a href="#linkage_dllimport">'<tt>dllimport</tt>' Linkage</a></li>
-          <li><a href="#linkage_dllexport">'<tt>dllexport</tt>' Linkage</a></li>
-        </ol>
-      </li>
-      <li><a href="#callingconv">Calling Conventions</a></li>
-      <li><a href="#namedtypes">Named Types</a></li>
-      <li><a href="#globalvars">Global Variables</a></li>
-      <li><a href="#functionstructure">Functions</a></li>
-      <li><a href="#aliasstructure">Aliases</a></li>
-      <li><a href="#namedmetadatastructure">Named Metadata</a></li>
-      <li><a href="#paramattrs">Parameter Attributes</a></li>
-      <li><a href="#fnattrs">Function Attributes</a></li>
-      <li><a href="#gc">Garbage Collector Names</a></li>
-      <li><a href="#moduleasm">Module-Level Inline Assembly</a></li>
-      <li><a href="#datalayout">Data Layout</a></li>
-      <li><a href="#pointeraliasing">Pointer Aliasing Rules</a></li>
-      <li><a href="#volatile">Volatile Memory Accesses</a></li>
-      <li><a href="#memmodel">Memory Model for Concurrent Operations</a></li>
-      <li><a href="#ordering">Atomic Memory Ordering Constraints</a></li>
-    </ol>
-  </li>
-  <li><a href="#typesystem">Type System</a>
-    <ol>
-      <li><a href="#t_classifications">Type Classifications</a></li>
-      <li><a href="#t_primitive">Primitive Types</a>
-        <ol>
-          <li><a href="#t_integer">Integer Type</a></li>
-          <li><a href="#t_floating">Floating Point Types</a></li>
-          <li><a href="#t_x86mmx">X86mmx Type</a></li>
-          <li><a href="#t_void">Void Type</a></li>
-          <li><a href="#t_label">Label Type</a></li>
-          <li><a href="#t_metadata">Metadata Type</a></li>
-        </ol>
-      </li>
-      <li><a href="#t_derived">Derived Types</a>
-        <ol>
-          <li><a href="#t_aggregate">Aggregate Types</a>
-            <ol>
-              <li><a href="#t_array">Array Type</a></li>
-              <li><a href="#t_struct">Structure Type</a></li>
-              <li><a href="#t_opaque">Opaque Structure Types</a></li>
-              <li><a href="#t_vector">Vector Type</a></li>
-            </ol>
-          </li>
-          <li><a href="#t_function">Function Type</a></li>
-          <li><a href="#t_pointer">Pointer Type</a></li>
-        </ol>
-      </li>
-    </ol>
-  </li>
-  <li><a href="#constants">Constants</a>
-    <ol>
-      <li><a href="#simpleconstants">Simple Constants</a></li>
-      <li><a href="#complexconstants">Complex Constants</a></li>
-      <li><a href="#globalconstants">Global Variable and Function Addresses</a></li>
-      <li><a href="#undefvalues">Undefined Values</a></li>
-      <li><a href="#poisonvalues">Poison Values</a></li>
-      <li><a href="#blockaddress">Addresses of Basic Blocks</a></li>
-      <li><a href="#constantexprs">Constant Expressions</a></li>
-    </ol>
-  </li>
-  <li><a href="#othervalues">Other Values</a>
-    <ol>
-      <li><a href="#inlineasm">Inline Assembler Expressions</a></li>
-      <li><a href="#metadata">Metadata Nodes and Metadata Strings</a>
-        <ol>
-          <li><a href="#tbaa">'<tt>tbaa</tt>' Metadata</a></li>
-          <li><a href="#tbaa.struct">'<tt>tbaa.struct</tt>' Metadata</a></li>
-          <li><a href="#fpmath">'<tt>fpmath</tt>' Metadata</a></li>
-          <li><a href="#range">'<tt>range</tt>' Metadata</a></li>
-        </ol>
-      </li>
-    </ol>
-  </li>
-  <li><a href="#module_flags">Module Flags Metadata</a>
-    <ol>
-      <li><a href="#objc_gc_flags">Objective-C Garbage Collection Module Flags Metadata</a></li>
-    </ol>
-  </li>
-  <li><a href="#intrinsic_globals">Intrinsic Global Variables</a>
-    <ol>
-      <li><a href="#intg_used">The '<tt>llvm.used</tt>' Global Variable</a></li>
-      <li><a href="#intg_compiler_used">The '<tt>llvm.compiler.used</tt>'
-          Global Variable</a></li>
-      <li><a href="#intg_global_ctors">The '<tt>llvm.global_ctors</tt>'
-         Global Variable</a></li>
-      <li><a href="#intg_global_dtors">The '<tt>llvm.global_dtors</tt>'
-         Global Variable</a></li>
-    </ol>
-  </li>
-  <li><a href="#instref">Instruction Reference</a>
-    <ol>
-      <li><a href="#terminators">Terminator Instructions</a>
-        <ol>
-          <li><a href="#i_ret">'<tt>ret</tt>' Instruction</a></li>
-          <li><a href="#i_br">'<tt>br</tt>' Instruction</a></li>
-          <li><a href="#i_switch">'<tt>switch</tt>' Instruction</a></li>
-          <li><a href="#i_indirectbr">'<tt>indirectbr</tt>' Instruction</a></li>
-          <li><a href="#i_invoke">'<tt>invoke</tt>' Instruction</a></li>
-          <li><a href="#i_resume">'<tt>resume</tt>'  Instruction</a></li>
-          <li><a href="#i_unreachable">'<tt>unreachable</tt>' Instruction</a></li>
-        </ol>
-      </li>
-      <li><a href="#binaryops">Binary Operations</a>
-        <ol>
-          <li><a href="#i_add">'<tt>add</tt>' Instruction</a></li>
-          <li><a href="#i_fadd">'<tt>fadd</tt>' Instruction</a></li>
-          <li><a href="#i_sub">'<tt>sub</tt>' Instruction</a></li>
-          <li><a href="#i_fsub">'<tt>fsub</tt>' Instruction</a></li>
-          <li><a href="#i_mul">'<tt>mul</tt>' Instruction</a></li>
-          <li><a href="#i_fmul">'<tt>fmul</tt>' Instruction</a></li>
-          <li><a href="#i_udiv">'<tt>udiv</tt>' Instruction</a></li>
-          <li><a href="#i_sdiv">'<tt>sdiv</tt>' Instruction</a></li>
-          <li><a href="#i_fdiv">'<tt>fdiv</tt>' Instruction</a></li>
-          <li><a href="#i_urem">'<tt>urem</tt>' Instruction</a></li>
-          <li><a href="#i_srem">'<tt>srem</tt>' Instruction</a></li>
-          <li><a href="#i_frem">'<tt>frem</tt>' Instruction</a></li>
-        </ol>
-      </li>
-      <li><a href="#bitwiseops">Bitwise Binary Operations</a>
-        <ol>
-          <li><a href="#i_shl">'<tt>shl</tt>' Instruction</a></li>
-          <li><a href="#i_lshr">'<tt>lshr</tt>' Instruction</a></li>
-          <li><a href="#i_ashr">'<tt>ashr</tt>' Instruction</a></li>
-          <li><a href="#i_and">'<tt>and</tt>' Instruction</a></li>
-          <li><a href="#i_or">'<tt>or</tt>'  Instruction</a></li>
-          <li><a href="#i_xor">'<tt>xor</tt>' Instruction</a></li>
-        </ol>
-      </li>
-      <li><a href="#vectorops">Vector Operations</a>
-        <ol>
-          <li><a href="#i_extractelement">'<tt>extractelement</tt>' Instruction</a></li>
-          <li><a href="#i_insertelement">'<tt>insertelement</tt>' Instruction</a></li>
-          <li><a href="#i_shufflevector">'<tt>shufflevector</tt>' Instruction</a></li>
-        </ol>
-      </li>
-      <li><a href="#aggregateops">Aggregate Operations</a>
-        <ol>
-          <li><a href="#i_extractvalue">'<tt>extractvalue</tt>' Instruction</a></li>
-          <li><a href="#i_insertvalue">'<tt>insertvalue</tt>' Instruction</a></li>
-        </ol>
-      </li>
-      <li><a href="#memoryops">Memory Access and Addressing Operations</a>
-        <ol>
-          <li><a href="#i_alloca">'<tt>alloca</tt>' Instruction</a></li>
-         <li><a href="#i_load">'<tt>load</tt>' Instruction</a></li>
-         <li><a href="#i_store">'<tt>store</tt>' Instruction</a></li>
-         <li><a href="#i_fence">'<tt>fence</tt>' Instruction</a></li>
-         <li><a href="#i_cmpxchg">'<tt>cmpxchg</tt>' Instruction</a></li>
-         <li><a href="#i_atomicrmw">'<tt>atomicrmw</tt>' Instruction</a></li>
-         <li><a href="#i_getelementptr">'<tt>getelementptr</tt>' Instruction</a></li>
-        </ol>
-      </li>
-      <li><a href="#convertops">Conversion Operations</a>
-        <ol>
-          <li><a href="#i_trunc">'<tt>trunc .. to</tt>' Instruction</a></li>
-          <li><a href="#i_zext">'<tt>zext .. to</tt>' Instruction</a></li>
-          <li><a href="#i_sext">'<tt>sext .. to</tt>' Instruction</a></li>
-          <li><a href="#i_fptrunc">'<tt>fptrunc .. to</tt>' Instruction</a></li>
-          <li><a href="#i_fpext">'<tt>fpext .. to</tt>' Instruction</a></li>
-          <li><a href="#i_fptoui">'<tt>fptoui .. to</tt>' Instruction</a></li>
-          <li><a href="#i_fptosi">'<tt>fptosi .. to</tt>' Instruction</a></li>
-          <li><a href="#i_uitofp">'<tt>uitofp .. to</tt>' Instruction</a></li>
-          <li><a href="#i_sitofp">'<tt>sitofp .. to</tt>' Instruction</a></li>
-          <li><a href="#i_ptrtoint">'<tt>ptrtoint .. to</tt>' Instruction</a></li>
-          <li><a href="#i_inttoptr">'<tt>inttoptr .. to</tt>' Instruction</a></li>
-          <li><a href="#i_bitcast">'<tt>bitcast .. to</tt>' Instruction</a></li>
-        </ol>
-      </li>
-      <li><a href="#otherops">Other Operations</a>
-        <ol>
-          <li><a href="#i_icmp">'<tt>icmp</tt>' Instruction</a></li>
-          <li><a href="#i_fcmp">'<tt>fcmp</tt>' Instruction</a></li>
-          <li><a href="#i_phi">'<tt>phi</tt>'   Instruction</a></li>
-          <li><a href="#i_select">'<tt>select</tt>' Instruction</a></li>
-          <li><a href="#i_call">'<tt>call</tt>'  Instruction</a></li>
-          <li><a href="#i_va_arg">'<tt>va_arg</tt>'  Instruction</a></li>
-          <li><a href="#i_landingpad">'<tt>landingpad</tt>' Instruction</a></li>
-        </ol>
-      </li>
-    </ol>
-  </li>
-  <li><a href="#intrinsics">Intrinsic Functions</a>
-    <ol>
-      <li><a href="#int_varargs">Variable Argument Handling Intrinsics</a>
-        <ol>
-          <li><a href="#int_va_start">'<tt>llvm.va_start</tt>' Intrinsic</a></li>
-          <li><a href="#int_va_end">'<tt>llvm.va_end</tt>'   Intrinsic</a></li>
-          <li><a href="#int_va_copy">'<tt>llvm.va_copy</tt>'  Intrinsic</a></li>
-        </ol>
-      </li>
-      <li><a href="#int_gc">Accurate Garbage Collection Intrinsics</a>
-        <ol>
-          <li><a href="#int_gcroot">'<tt>llvm.gcroot</tt>' Intrinsic</a></li>
-          <li><a href="#int_gcread">'<tt>llvm.gcread</tt>' Intrinsic</a></li>
-          <li><a href="#int_gcwrite">'<tt>llvm.gcwrite</tt>' Intrinsic</a></li>
-        </ol>
-      </li>
-      <li><a href="#int_codegen">Code Generator Intrinsics</a>
-        <ol>
-          <li><a href="#int_returnaddress">'<tt>llvm.returnaddress</tt>' Intrinsic</a></li>
-          <li><a href="#int_frameaddress">'<tt>llvm.frameaddress</tt>'   Intrinsic</a></li>
-          <li><a href="#int_stacksave">'<tt>llvm.stacksave</tt>' Intrinsic</a></li>
-          <li><a href="#int_stackrestore">'<tt>llvm.stackrestore</tt>' Intrinsic</a></li>
-          <li><a href="#int_prefetch">'<tt>llvm.prefetch</tt>' Intrinsic</a></li>
-          <li><a href="#int_pcmarker">'<tt>llvm.pcmarker</tt>' Intrinsic</a></li>
-          <li><a href="#int_readcyclecounter">'<tt>llvm.readcyclecounter</tt>' Intrinsic</a></li>
-        </ol>
-      </li>
-      <li><a href="#int_libc">Standard C Library Intrinsics</a>
-        <ol>
-          <li><a href="#int_memcpy">'<tt>llvm.memcpy.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_memmove">'<tt>llvm.memmove.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_memset">'<tt>llvm.memset.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_sqrt">'<tt>llvm.sqrt.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_powi">'<tt>llvm.powi.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_sin">'<tt>llvm.sin.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_cos">'<tt>llvm.cos.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_pow">'<tt>llvm.pow.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_exp">'<tt>llvm.exp.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_exp2">'<tt>llvm.exp2.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_log">'<tt>llvm.log.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_log10">'<tt>llvm.log10.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_log2">'<tt>llvm.log2.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_fma">'<tt>llvm.fma.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_fabs">'<tt>llvm.fabs.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_floor">'<tt>llvm.floor.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_ceil">'<tt>llvm.ceil.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_trunc">'<tt>llvm.trunc.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_rint">'<tt>llvm.rint.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_nearbyint">'<tt>llvm.nearbyint.*</tt>' Intrinsic</a></li>
-        </ol>
-      </li>
-      <li><a href="#int_manip">Bit Manipulation Intrinsics</a>
-        <ol>
-          <li><a href="#int_bswap">'<tt>llvm.bswap.*</tt>' Intrinsics</a></li>
-          <li><a href="#int_ctpop">'<tt>llvm.ctpop.*</tt>' Intrinsic </a></li>
-          <li><a href="#int_ctlz">'<tt>llvm.ctlz.*</tt>' Intrinsic </a></li>
-          <li><a href="#int_cttz">'<tt>llvm.cttz.*</tt>' Intrinsic </a></li>
-        </ol>
-      </li>
-      <li><a href="#int_overflow">Arithmetic with Overflow Intrinsics</a>
-        <ol>
-          <li><a href="#int_sadd_overflow">'<tt>llvm.sadd.with.overflow.*</tt> Intrinsics</a></li>
-          <li><a href="#int_uadd_overflow">'<tt>llvm.uadd.with.overflow.*</tt> Intrinsics</a></li>
-          <li><a href="#int_ssub_overflow">'<tt>llvm.ssub.with.overflow.*</tt> Intrinsics</a></li>
-          <li><a href="#int_usub_overflow">'<tt>llvm.usub.with.overflow.*</tt> Intrinsics</a></li>
-          <li><a href="#int_smul_overflow">'<tt>llvm.smul.with.overflow.*</tt> Intrinsics</a></li>
-          <li><a href="#int_umul_overflow">'<tt>llvm.umul.with.overflow.*</tt> Intrinsics</a></li>
-        </ol>
-      </li>
-      <li><a href="#spec_arithmetic">Specialised Arithmetic Intrinsics</a>
-        <ol>
-          <li><a href="#fmuladd">'<tt>llvm.fmuladd</tt> Intrinsic</a></li>
-        </ol>
-      </li>
-      <li><a href="#int_fp16">Half Precision Floating Point Intrinsics</a>
-        <ol>
-          <li><a href="#int_convert_to_fp16">'<tt>llvm.convert.to.fp16</tt>' Intrinsic</a></li>
-          <li><a href="#int_convert_from_fp16">'<tt>llvm.convert.from.fp16</tt>' Intrinsic</a></li>
-        </ol>
-      </li>
-      <li><a href="#int_debugger">Debugger intrinsics</a></li>
-      <li><a href="#int_eh">Exception Handling intrinsics</a></li>
-      <li><a href="#int_trampoline">Trampoline Intrinsics</a>
-        <ol>
-          <li><a href="#int_it">'<tt>llvm.init.trampoline</tt>' Intrinsic</a></li>
-          <li><a href="#int_at">'<tt>llvm.adjust.trampoline</tt>' Intrinsic</a></li>
-        </ol>
-      </li>
-      <li><a href="#int_memorymarkers">Memory Use Markers</a>
-        <ol>
-          <li><a href="#int_lifetime_start">'<tt>llvm.lifetime.start</tt>' Intrinsic</a></li>
-          <li><a href="#int_lifetime_end">'<tt>llvm.lifetime.end</tt>' Intrinsic</a></li>
-          <li><a href="#int_invariant_start">'<tt>llvm.invariant.start</tt>' Intrinsic</a></li>
-          <li><a href="#int_invariant_end">'<tt>llvm.invariant.end</tt>' Intrinsic</a></li>
-        </ol>
-      </li>
-      <li><a href="#int_general">General intrinsics</a>
-        <ol>
-          <li><a href="#int_var_annotation">
-            '<tt>llvm.var.annotation</tt>' Intrinsic</a></li>
-          <li><a href="#int_annotation">
-            '<tt>llvm.annotation.*</tt>' Intrinsic</a></li>
-          <li><a href="#int_trap">
-            '<tt>llvm.trap</tt>' Intrinsic</a></li>
-          <li><a href="#int_debugtrap">
-            '<tt>llvm.debugtrap</tt>' Intrinsic</a></li>
-          <li><a href="#int_stackprotector">
-            '<tt>llvm.stackprotector</tt>' Intrinsic</a></li>
-          <li><a href="#int_objectsize">
-            '<tt>llvm.objectsize</tt>' Intrinsic</a></li>
-          <li><a href="#int_expect">
-            '<tt>llvm.expect</tt>' Intrinsic</a></li>
-          <li><a href="#int_donothing">
-            '<tt>llvm.donothing</tt>' Intrinsic</a></li>
-        </ol>
-      </li>
-    </ol>
-  </li>
-</ol>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-            and <a href="mailto:vadve@cs.uiuc.edu">Vikram Adve</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="abstract">Abstract</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This document is a reference manual for the LLVM assembly language. LLVM is
-   a Static Single Assignment (SSA) based representation that provides type
-   safety, low-level operations, flexibility, and the capability of representing
-   'all' high-level languages cleanly.  It is the common code representation
-   used throughout all phases of the LLVM compilation strategy.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="introduction">Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The LLVM code representation is designed to be used in three different forms:
-   as an in-memory compiler IR, as an on-disk bitcode representation (suitable
-   for fast loading by a Just-In-Time compiler), and as a human readable
-   assembly language representation.  This allows LLVM to provide a powerful
-   intermediate representation for efficient compiler transformations and
-   analysis, while providing a natural means to debug and visualize the
-   transformations.  The three different forms of LLVM are all equivalent.  This
-   document describes the human readable representation and notation.</p>
-
-<p>The LLVM representation aims to be light-weight and low-level while being
-   expressive, typed, and extensible at the same time.  It aims to be a
-   "universal IR" of sorts, by being at a low enough level that high-level ideas
-   may be cleanly mapped to it (similar to how microprocessors are "universal
-   IR's", allowing many source languages to be mapped to them).  By providing
-   type information, LLVM can be used as the target of optimizations: for
-   example, through pointer analysis, it can be proven that a C automatic
-   variable is never accessed outside of the current function, allowing it to
-   be promoted to a simple SSA value instead of a memory location.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="wellformed">Well-Formedness</a>
-</h4>
-
-<div>
-
-<p>It is important to note that this document describes 'well formed' LLVM
-   assembly language.  There is a difference between what the parser accepts and
-   what is considered 'well formed'.  For example, the following instruction is
-   syntactically okay, but not well formed:</p>
-
-<pre class="doc_code">
-%x = <a href="#i_add">add</a> i32 1, %x
-</pre>
-
-<p>because the definition of <tt>%x</tt> does not dominate all of its uses. The
-   LLVM infrastructure provides a verification pass that may be used to verify
-   that an LLVM module is well formed.  This pass is automatically run by the
-   parser after parsing input assembly and by the optimizer before it outputs
-   bitcode.  The violations pointed out by the verifier pass indicate bugs in
-   transformation passes or input to the parser.</p>
-
-</div>
-
-</div>
-
-<!-- Describe the typesetting conventions here. -->
-
-<!-- *********************************************************************** -->
-<h2><a name="identifiers">Identifiers</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>LLVM identifiers come in two basic types: global and local. Global
-   identifiers (functions, global variables) begin with the <tt>'@'</tt>
-   character. Local identifiers (register names, types) begin with
-   the <tt>'%'</tt> character. Additionally, there are three different formats
-   for identifiers, for different purposes:</p>
-
-<ol>
-  <li>Named values are represented as a string of characters with their prefix.
-      For example, <tt>%foo</tt>, <tt>@DivisionByZero</tt>,
-      <tt>%a.really.long.identifier</tt>. The actual regular expression used is
-      '<tt>[%@][a-zA-Z$._][a-zA-Z$._0-9]*</tt>'.  Identifiers which require
-      other characters in their names can be surrounded with quotes. Special
-      characters may be escaped using <tt>"\xx"</tt> where <tt>xx</tt> is the
-      ASCII code for the character in hexadecimal.  In this way, any character
-      can be used in a name value, even quotes themselves.</li>
-
-  <li>Unnamed values are represented as an unsigned numeric value with their
-      prefix.  For example, <tt>%12</tt>, <tt>@2</tt>, <tt>%44</tt>.</li>
-
-  <li>Constants, which are described in a <a href="#constants">section about
-      constants</a>, below.</li>
-</ol>
-
-<p>LLVM requires that values start with a prefix for two reasons: Compilers
-   don't need to worry about name clashes with reserved words, and the set of
-   reserved words may be expanded in the future without penalty.  Additionally,
-   unnamed identifiers allow a compiler to quickly come up with a temporary
-   variable without having to avoid symbol table conflicts.</p>
-
-<p>Reserved words in LLVM are very similar to reserved words in other
-   languages. There are keywords for different opcodes
-   ('<tt><a href="#i_add">add</a></tt>',
-   '<tt><a href="#i_bitcast">bitcast</a></tt>',
-   '<tt><a href="#i_ret">ret</a></tt>', etc...), for primitive type names
-   ('<tt><a href="#t_void">void</a></tt>',
-   '<tt><a href="#t_primitive">i32</a></tt>', etc...), and others.  These
-   reserved words cannot conflict with variable names, because none of them
-   start with a prefix character (<tt>'%'</tt> or <tt>'@'</tt>).</p>
-
-<p>Here is an example of LLVM code to multiply the integer variable
-   '<tt>%X</tt>' by 8:</p>
-
-<p>The easy way:</p>
-
-<pre class="doc_code">
-%result = <a href="#i_mul">mul</a> i32 %X, 8
-</pre>
-
-<p>After strength reduction:</p>
-
-<pre class="doc_code">
-%result = <a href="#i_shl">shl</a> i32 %X, i8 3
-</pre>
-
-<p>And the hard way:</p>
-
-<pre class="doc_code">
-%0 = <a href="#i_add">add</a> i32 %X, %X           <i>; yields {i32}:%0</i>
-%1 = <a href="#i_add">add</a> i32 %0, %0           <i>; yields {i32}:%1</i>
-%result = <a href="#i_add">add</a> i32 %1, %1
-</pre>
-
-<p>This last way of multiplying <tt>%X</tt> by 8 illustrates several important
-   lexical features of LLVM:</p>
-
-<ol>
-  <li>Comments are delimited with a '<tt>;</tt>' and go until the end of
-      line.</li>
-
-  <li>Unnamed temporaries are created when the result of a computation is not
-      assigned to a named value.</li>
-
-  <li>Unnamed temporaries are numbered sequentially</li>
-</ol>
-
-<p>It also shows a convention that we follow in this document.  When
-   demonstrating instructions, we will follow an instruction with a comment that
-   defines the type and name of value produced.  Comments are shown in italic
-   text.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="highlevel">High Level Structure</a></h2>
-<!-- *********************************************************************** -->
-<div>
-<!-- ======================================================================= -->
-<h3>
-  <a name="modulestructure">Module Structure</a>
-</h3>
-
-<div>
-
-<p>LLVM programs are composed of <tt>Module</tt>s, each of which is a
-   translation unit of the input programs.  Each module consists of functions,
-   global variables, and symbol table entries.  Modules may be combined together
-   with the LLVM linker, which merges function (and global variable)
-   definitions, resolves forward declarations, and merges symbol table
-   entries. Here is an example of the "hello world" module:</p>
-
-<pre class="doc_code">
-<i>; Declare the string constant as a global constant.</i>&nbsp;
-<a href="#identifiers">@.str</a> = <a href="#linkage_private">private</a>&nbsp;<a href="#globalvars">unnamed_addr</a>&nbsp;<a href="#globalvars">constant</a>&nbsp;<a href="#t_array">[13 x i8]</a> c"hello world\0A\00"&nbsp;
-
-<i>; External declaration of the puts function</i>&nbsp;
-<a href="#functionstructure">declare</a> i32 @puts(i8* <a href="#nocapture">nocapture</a>) <a href="#fnattrs">nounwind</a>&nbsp;
-
-<i>; Definition of main function</i>
-define i32 @main() {   <i>; i32()* </i>&nbsp;
-  <i>; Convert [13 x i8]* to i8  *...</i>&nbsp;
-  %cast210 = <a href="#i_getelementptr">getelementptr</a> [13 x i8]* @.str, i64 0, i64 0
-
-  <i>; Call puts function to write out the string to stdout.</i>&nbsp;
-  <a href="#i_call">call</a> i32 @puts(i8* %cast210)
-  <a href="#i_ret">ret</a> i32 0&nbsp;
-}
-
-<i>; Named metadata</i>
-!1 = metadata !{i32 42}
-!foo = !{!1, null}
-</pre>
-
-<p>This example is made up of a <a href="#globalvars">global variable</a> named
-   "<tt>.str</tt>", an external declaration of the "<tt>puts</tt>" function,
-   a <a href="#functionstructure">function definition</a> for
-   "<tt>main</tt>" and <a href="#namedmetadatastructure">named metadata</a>
-   "<tt>foo</tt>".</p>
-
-<p>In general, a module is made up of a list of global values (where both
-   functions and global variables are global values). Global values are
-   represented by a pointer to a memory location (in this case, a pointer to an
-   array of char, and a pointer to a function), and have one of the
-   following <a href="#linkage">linkage types</a>.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="linkage">Linkage Types</a>
-</h3>
-
-<div>
-
-<p>All Global Variables and Functions have one of the following types of
-   linkage:</p>
-
-<dl>
-  <dt><tt><b><a name="linkage_private">private</a></b></tt></dt>
-  <dd>Global values with "<tt>private</tt>" linkage are only directly accessible
-      by objects in the current module. In particular, linking code into a
-      module with an private global value may cause the private to be renamed as
-      necessary to avoid collisions.  Because the symbol is private to the
-      module, all references can be updated. This doesn't show up in any symbol
-      table in the object file.</dd>
-
-  <dt><tt><b><a name="linkage_linker_private">linker_private</a></b></tt></dt>
-  <dd>Similar to <tt>private</tt>, but the symbol is passed through the
-      assembler and evaluated by the linker. Unlike normal strong symbols, they
-      are removed by the linker from the final linked image (executable or
-      dynamic library).</dd>
-
-  <dt><tt><b><a name="linkage_linker_private_weak">linker_private_weak</a></b></tt></dt>
-  <dd>Similar to "<tt>linker_private</tt>", but the symbol is weak. Note that
-      <tt>linker_private_weak</tt> symbols are subject to coalescing by the
-      linker. The symbols are removed by the linker from the final linked image
-      (executable or dynamic library).</dd>
-
-  <dt><tt><b><a name="linkage_internal">internal</a></b></tt></dt>
-  <dd>Similar to private, but the value shows as a local symbol
-      (<tt>STB_LOCAL</tt> in the case of ELF) in the object file. This
-      corresponds to the notion of the '<tt>static</tt>' keyword in C.</dd>
-
-  <dt><tt><b><a name="linkage_available_externally">available_externally</a></b></tt></dt>
-  <dd>Globals with "<tt>available_externally</tt>" linkage are never emitted
-      into the object file corresponding to the LLVM module.  They exist to
-      allow inlining and other optimizations to take place given knowledge of
-      the definition of the global, which is known to be somewhere outside the
-      module.  Globals with <tt>available_externally</tt> linkage are allowed to
-      be discarded at will, and are otherwise the same as <tt>linkonce_odr</tt>.
-      This linkage type is only allowed on definitions, not declarations.</dd>
-
-  <dt><tt><b><a name="linkage_linkonce">linkonce</a></b></tt></dt>
-  <dd>Globals with "<tt>linkonce</tt>" linkage are merged with other globals of
-      the same name when linkage occurs.  This can be used to implement
-      some forms of inline functions, templates, or other code which must be
-      generated in each translation unit that uses it, but where the body may
-      be overridden with a more definitive definition later.  Unreferenced
-      <tt>linkonce</tt> globals are allowed to be discarded.  Note that
-      <tt>linkonce</tt> linkage does not actually allow the optimizer to
-      inline the body of this function into callers because it doesn't know if
-      this definition of the function is the definitive definition within the
-      program or whether it will be overridden by a stronger definition.
-      To enable inlining and other optimizations, use "<tt>linkonce_odr</tt>"
-      linkage.</dd>
-
-  <dt><tt><b><a name="linkage_weak">weak</a></b></tt></dt>
-  <dd>"<tt>weak</tt>" linkage has the same merging semantics as
-      <tt>linkonce</tt> linkage, except that unreferenced globals with
-      <tt>weak</tt> linkage may not be discarded.  This is used for globals that
-      are declared "weak" in C source code.</dd>
-
-  <dt><tt><b><a name="linkage_common">common</a></b></tt></dt>
-  <dd>"<tt>common</tt>" linkage is most similar to "<tt>weak</tt>" linkage, but
-      they are used for tentative definitions in C, such as "<tt>int X;</tt>" at
-      global scope.
-      Symbols with "<tt>common</tt>" linkage are merged in the same way as
-      <tt>weak symbols</tt>, and they may not be deleted if unreferenced.
-      <tt>common</tt> symbols may not have an explicit section,
-      must have a zero initializer, and may not be marked '<a
-      href="#globalvars"><tt>constant</tt></a>'.  Functions and aliases may not
-      have common linkage.</dd>
-
-
-  <dt><tt><b><a name="linkage_appending">appending</a></b></tt></dt>
-  <dd>"<tt>appending</tt>" linkage may only be applied to global variables of
-      pointer to array type.  When two global variables with appending linkage
-      are linked together, the two global arrays are appended together.  This is
-      the LLVM, typesafe, equivalent of having the system linker append together
-      "sections" with identical names when .o files are linked.</dd>
-
-  <dt><tt><b><a name="linkage_externweak">extern_weak</a></b></tt></dt>
-  <dd>The semantics of this linkage follow the ELF object file model: the symbol
-      is weak until linked, if not linked, the symbol becomes null instead of
-      being an undefined reference.</dd>
-
-  <dt><tt><b><a name="linkage_linkonce_odr">linkonce_odr</a></b></tt></dt>
-  <dt><tt><b><a name="linkage_weak_odr">weak_odr</a></b></tt></dt>
-  <dd>Some languages allow differing globals to be merged, such as two functions
-      with different semantics.  Other languages, such as <tt>C++</tt>, ensure
-      that only equivalent globals are ever merged (the "one definition rule"
-      &mdash; "ODR").  Such languages can use the <tt>linkonce_odr</tt>
-      and <tt>weak_odr</tt> linkage types to indicate that the global will only
-      be merged with equivalent globals.  These linkage types are otherwise the
-      same as their non-<tt>odr</tt> versions.</dd>
-
-  <dt><tt><b><a name="linkage_linkonce_odr_auto_hide">linkonce_odr_auto_hide</a></b></tt></dt>
-  <dd>Similar to "<tt>linkonce_odr</tt>", but nothing in the translation unit
-      takes the address of this definition. For instance, functions that had an
-      inline definition, but the compiler decided not to inline it.
-      <tt>linkonce_odr_auto_hide</tt> may have only <tt>default</tt> visibility.
-      The symbols are removed by the linker from the final linked image
-      (executable or dynamic library).</dd>
-
-  <dt><tt><b><a name="linkage_external">external</a></b></tt></dt>
-  <dd>If none of the above identifiers are used, the global is externally
-      visible, meaning that it participates in linkage and can be used to
-      resolve external symbol references.</dd>
-</dl>
-
-<p>The next two types of linkage are targeted for Microsoft Windows platform
-   only. They are designed to support importing (exporting) symbols from (to)
-   DLLs (Dynamic Link Libraries).</p>
-
-<dl>
-  <dt><tt><b><a name="linkage_dllimport">dllimport</a></b></tt></dt>
-  <dd>"<tt>dllimport</tt>" linkage causes the compiler to reference a function
-      or variable via a global pointer to a pointer that is set up by the DLL
-      exporting the symbol. On Microsoft Windows targets, the pointer name is
-      formed by combining <code>__imp_</code> and the function or variable
-      name.</dd>
-
-  <dt><tt><b><a name="linkage_dllexport">dllexport</a></b></tt></dt>
-  <dd>"<tt>dllexport</tt>" linkage causes the compiler to provide a global
-      pointer to a pointer in a DLL, so that it can be referenced with the
-      <tt>dllimport</tt> attribute. On Microsoft Windows targets, the pointer
-      name is formed by combining <code>__imp_</code> and the function or
-      variable name.</dd>
-</dl>
-
-<p>For example, since the "<tt>.LC0</tt>" variable is defined to be internal, if
-   another module defined a "<tt>.LC0</tt>" variable and was linked with this
-   one, one of the two would be renamed, preventing a collision.  Since
-   "<tt>main</tt>" and "<tt>puts</tt>" are external (i.e., lacking any linkage
-   declarations), they are accessible outside of the current module.</p>
-
-<p>It is illegal for a function <i>declaration</i> to have any linkage type
-   other than <tt>external</tt>, <tt>dllimport</tt>
-  or <tt>extern_weak</tt>.</p>
-
-<p>Aliases can have only <tt>external</tt>, <tt>internal</tt>, <tt>weak</tt>
-   or <tt>weak_odr</tt> linkages.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="callingconv">Calling Conventions</a>
-</h3>
-
-<div>
-
-<p>LLVM <a href="#functionstructure">functions</a>, <a href="#i_call">calls</a>
-   and <a href="#i_invoke">invokes</a> can all have an optional calling
-   convention specified for the call.  The calling convention of any pair of
-   dynamic caller/callee must match, or the behavior of the program is
-   undefined.  The following calling conventions are supported by LLVM, and more
-   may be added in the future:</p>
-
-<dl>
-  <dt><b>"<tt>ccc</tt>" - The C calling convention</b>:</dt>
-  <dd>This calling convention (the default if no other calling convention is
-      specified) matches the target C calling conventions.  This calling
-      convention supports varargs function calls and tolerates some mismatch in
-      the declared prototype and implemented declaration of the function (as
-      does normal C).</dd>
-
-  <dt><b>"<tt>fastcc</tt>" - The fast calling convention</b>:</dt>
-  <dd>This calling convention attempts to make calls as fast as possible
-      (e.g. by passing things in registers).  This calling convention allows the
-      target to use whatever tricks it wants to produce fast code for the
-      target, without having to conform to an externally specified ABI
-      (Application Binary Interface).
-      <a href="CodeGenerator.html#id80">Tail calls can only be optimized
-      when this, the GHC or the HiPE convention is used.</a> This calling
-      convention does not support varargs and requires the prototype of all
-      callees to exactly match the prototype of the function definition.</dd>
-
-  <dt><b>"<tt>coldcc</tt>" - The cold calling convention</b>:</dt>
-  <dd>This calling convention attempts to make code in the caller as efficient
-      as possible under the assumption that the call is not commonly executed.
-      As such, these calls often preserve all registers so that the call does
-      not break any live ranges in the caller side.  This calling convention
-      does not support varargs and requires the prototype of all callees to
-      exactly match the prototype of the function definition.</dd>
-
-  <dt><b>"<tt>cc <em>10</em></tt>" - GHC convention</b>:</dt>
-  <dd>This calling convention has been implemented specifically for use by the
-      <a href="http://www.haskell.org/ghc">Glasgow Haskell Compiler (GHC)</a>.
-      It passes everything in registers, going to extremes to achieve this by
-      disabling callee save registers. This calling convention should not be
-      used lightly but only for specific situations such as an alternative to
-      the <em>register pinning</em> performance technique often used when
-      implementing functional programming languages. At the moment only X86
-      supports this convention and it has the following limitations:
-      <ul>
-        <li>On <em>X86-32</em> only supports up to 4 bit type parameters. No
-            floating point types are supported.</li>
-        <li>On <em>X86-64</em> only supports up to 10 bit type parameters and
-            6 floating point parameters.</li>
-      </ul>
-      This calling convention supports
-      <a href="CodeGenerator.html#id80">tail call optimization</a> but
-      requires both the caller and callee are using it.
-  </dd>
-
-  <dt><b>"<tt>cc <em>11</em></tt>" - The HiPE calling convention</b>:</dt>
-  <dd>This calling convention has been implemented specifically for use by the
-      <a href="http://www.it.uu.se/research/group/hipe/">High-Performance Erlang
-      (HiPE)</a> compiler, <em>the</em> native code compiler of the
-      <a href="http://www.erlang.org/download.shtml">Ericsson's Open Source
-      Erlang/OTP system</a>. It uses more registers for argument passing than
-      the ordinary C calling convention and defines no callee-saved registers.
-      The calling convention properly supports
-      <a href="CodeGenerator.html#id80">tail call optimization</a> but requires
-      that both the caller and the callee use it. It uses a <em>register
-      pinning</em> mechanism, similar to GHC's convention, for keeping
-      frequently accessed runtime components pinned to specific hardware
-      registers. At the moment only X86 supports this convention (both 32 and 64
-      bit).</dd>
-
-  <dt><b>"<tt>cc &lt;<em>n</em>&gt;</tt>" - Numbered convention</b>:</dt>
-  <dd>Any calling convention may be specified by number, allowing
-      target-specific calling conventions to be used.  Target specific calling
-      conventions start at 64.</dd>
-</dl>
-
-<p>More calling conventions can be added/defined on an as-needed basis, to
-   support Pascal conventions or any other well-known target-independent
-   convention.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="visibility">Visibility Styles</a>
-</h3>
-
-<div>
-
-<p>All Global Variables and Functions have one of the following visibility
-   styles:</p>
-
-<dl>
-  <dt><b>"<tt>default</tt>" - Default style</b>:</dt>
-  <dd>On targets that use the ELF object file format, default visibility means
-      that the declaration is visible to other modules and, in shared libraries,
-      means that the declared entity may be overridden. On Darwin, default
-      visibility means that the declaration is visible to other modules. Default
-      visibility corresponds to "external linkage" in the language.</dd>
-
-  <dt><b>"<tt>hidden</tt>" - Hidden style</b>:</dt>
-  <dd>Two declarations of an object with hidden visibility refer to the same
-      object if they are in the same shared object. Usually, hidden visibility
-      indicates that the symbol will not be placed into the dynamic symbol
-      table, so no other module (executable or shared library) can reference it
-      directly.</dd>
-
-  <dt><b>"<tt>protected</tt>" - Protected style</b>:</dt>
-  <dd>On ELF, protected visibility indicates that the symbol will be placed in
-      the dynamic symbol table, but that references within the defining module
-      will bind to the local symbol. That is, the symbol cannot be overridden by
-      another module.</dd>
-</dl>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="namedtypes">Named Types</a>
-</h3>
-
-<div>
-
-<p>LLVM IR allows you to specify name aliases for certain types.  This can make
-   it easier to read the IR and make the IR more condensed (particularly when
-   recursive types are involved).  An example of a name specification is:</p>
-
-<pre class="doc_code">
-%mytype = type { %mytype*, i32 }
-</pre>
-
-<p>You may give a name to any <a href="#typesystem">type</a> except
-   "<a href="#t_void">void</a>".  Type name aliases may be used anywhere a type
-   is expected with the syntax "%mytype".</p>
-
-<p>Note that type names are aliases for the structural type that they indicate,
-   and that you can therefore specify multiple names for the same type.  This
-   often leads to confusing behavior when dumping out a .ll file.  Since LLVM IR
-   uses structural typing, the name is not part of the type.  When printing out
-   LLVM IR, the printer will pick <em>one name</em> to render all types of a
-   particular shape.  This means that if you have code where two different
-   source types end up having the same LLVM type, that the dumper will sometimes
-   print the "wrong" or unexpected type.  This is an important design point and
-   isn't going to change.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="globalvars">Global Variables</a>
-</h3>
-
-<div>
-
-<p>Global variables define regions of memory allocated at compilation time
-   instead of run-time.  Global variables may optionally be initialized, may
-   have an explicit section to be placed in, and may have an optional explicit
-   alignment specified.</p>
-
-<p>A variable may be defined as <tt>thread_local</tt>, which
-   means that it will not be shared by threads (each thread will have a
-   separated copy of the variable).  Not all targets support thread-local
-   variables.  Optionally, a TLS model may be specified:</p>
-
-<dl>
-  <dt><b><tt>localdynamic</tt></b>:</dt>
-  <dd>For variables that are only used within the current shared library.</dd>
-
-  <dt><b><tt>initialexec</tt></b>:</dt>
-  <dd>For variables in modules that will not be loaded dynamically.</dd>
-
-  <dt><b><tt>localexec</tt></b>:</dt>
-  <dd>For variables defined in the executable and only used within it.</dd>
-</dl>
-
-<p>The models correspond to the ELF TLS models; see
-   <a href="http://people.redhat.com/drepper/tls.pdf">ELF
-   Handling For Thread-Local Storage</a> for more information on under which
-   circumstances the different models may be used.  The target may choose a
-   different TLS model if the specified model is not supported, or if a better
-   choice of model can be made.</p>
-
-<p>A variable may be defined as a global
-   "constant," which indicates that the contents of the variable
-   will <b>never</b> be modified (enabling better optimization, allowing the
-   global data to be placed in the read-only section of an executable, etc).
-   Note that variables that need runtime initialization cannot be marked
-   "constant" as there is a store to the variable.</p>
-
-<p>LLVM explicitly allows <em>declarations</em> of global variables to be marked
-   constant, even if the final definition of the global is not.  This capability
-   can be used to enable slightly better optimization of the program, but
-   requires the language definition to guarantee that optimizations based on the
-   'constantness' are valid for the translation units that do not include the
-   definition.</p>
-
-<p>As SSA values, global variables define pointer values that are in scope
-   (i.e. they dominate) all basic blocks in the program.  Global variables
-   always define a pointer to their "content" type because they describe a
-   region of memory, and all memory objects in LLVM are accessed through
-   pointers.</p>
-
-<p>Global variables can be marked with <tt>unnamed_addr</tt> which indicates
-  that the address is not significant, only the content. Constants marked
-  like this can be merged with other constants if they have the same
-  initializer. Note that a constant with significant address <em>can</em>
-  be merged with a <tt>unnamed_addr</tt> constant, the result being a
-  constant whose address is significant.</p>
-
-<p>A global variable may be declared to reside in a target-specific numbered
-   address space. For targets that support them, address spaces may affect how
-   optimizations are performed and/or what target instructions are used to
-   access the variable. The default address space is zero. The address space
-   qualifier must precede any other attributes.</p>
-
-<p>LLVM allows an explicit section to be specified for globals.  If the target
-   supports it, it will emit globals to the section specified.</p>
-
-<p>An explicit alignment may be specified for a global, which must be a power
-   of 2.  If not present, or if the alignment is set to zero, the alignment of
-   the global is set by the target to whatever it feels convenient.  If an
-   explicit alignment is specified, the global is forced to have exactly that
-   alignment.  Targets and optimizers are not allowed to over-align the global
-   if the global has an assigned section.  In this case, the extra alignment
-   could be observable: for example, code could assume that the globals are
-   densely packed in their section and try to iterate over them as an array,
-   alignment padding would break this iteration.</p>
-
-<p>For example, the following defines a global in a numbered address space with
-   an initializer, section, and alignment:</p>
-
-<pre class="doc_code">
-@G = addrspace(5) constant float 1.0, section "foo", align 4
-</pre>
-
-<p>The following example defines a thread-local global with
-   the <tt>initialexec</tt> TLS model:</p>
-
-<pre class="doc_code">
-@G = thread_local(initialexec) global i32 0, align 4
-</pre>
-
-</div>
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="functionstructure">Functions</a>
-</h3>
-
-<div>
-
-<p>LLVM function definitions consist of the "<tt>define</tt>" keyword, an
-   optional <a href="#linkage">linkage type</a>, an optional
-   <a href="#visibility">visibility style</a>, an optional
-   <a href="#callingconv">calling convention</a>,
-   an optional <tt>unnamed_addr</tt> attribute, a return type, an optional
-   <a href="#paramattrs">parameter attribute</a> for the return type, a function
-   name, a (possibly empty) argument list (each with optional
-   <a href="#paramattrs">parameter attributes</a>), optional
-   <a href="#fnattrs">function attributes</a>, an optional section, an optional
-   alignment, an optional <a href="#gc">garbage collector name</a>, an opening
-   curly brace, a list of basic blocks, and a closing curly brace.</p>
-
-<p>LLVM function declarations consist of the "<tt>declare</tt>" keyword, an
-   optional <a href="#linkage">linkage type</a>, an optional
-   <a href="#visibility">visibility style</a>, an optional
-   <a href="#callingconv">calling convention</a>,
-   an optional <tt>unnamed_addr</tt> attribute, a return type, an optional
-   <a href="#paramattrs">parameter attribute</a> for the return type, a function
-   name, a possibly empty list of arguments, an optional alignment, and an
-   optional <a href="#gc">garbage collector name</a>.</p>
-
-<p>A function definition contains a list of basic blocks, forming the CFG
-   (Control Flow Graph) for the function.  Each basic block may optionally start
-   with a label (giving the basic block a symbol table entry), contains a list
-   of instructions, and ends with a <a href="#terminators">terminator</a>
-   instruction (such as a branch or function return).</p>
-
-<p>The first basic block in a function is special in two ways: it is immediately
-   executed on entrance to the function, and it is not allowed to have
-   predecessor basic blocks (i.e. there can not be any branches to the entry
-   block of a function).  Because the block can have no predecessors, it also
-   cannot have any <a href="#i_phi">PHI nodes</a>.</p>
-
-<p>LLVM allows an explicit section to be specified for functions.  If the target
-   supports it, it will emit functions to the section specified.</p>
-
-<p>An explicit alignment may be specified for a function.  If not present, or if
-   the alignment is set to zero, the alignment of the function is set by the
-   target to whatever it feels convenient.  If an explicit alignment is
-   specified, the function is forced to have at least that much alignment.  All
-   alignments must be a power of 2.</p>
-
-<p>If the <tt>unnamed_addr</tt> attribute is given, the address is know to not
-   be significant and two identical functions can be merged.</p>
-
-<h5>Syntax:</h5>
-<pre class="doc_code">
-define [<a href="#linkage">linkage</a>] [<a href="#visibility">visibility</a>]
-       [<a href="#callingconv">cconv</a>] [<a href="#paramattrs">ret attrs</a>]
-       &lt;ResultType&gt; @&lt;FunctionName&gt; ([argument list])
-       [<a href="#fnattrs">fn Attrs</a>] [section "name"] [align N]
-       [<a href="#gc">gc</a>] { ... }
-</pre>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="aliasstructure">Aliases</a>
-</h3>
-
-<div>
-
-<p>Aliases act as "second name" for the aliasee value (which can be either
-   function, global variable, another alias or bitcast of global value). Aliases
-   may have an optional <a href="#linkage">linkage type</a>, and an
-   optional <a href="#visibility">visibility style</a>.</p>
-
-<h5>Syntax:</h5>
-<pre class="doc_code">
-@&lt;Name&gt; = alias [Linkage] [Visibility] &lt;AliaseeTy&gt; @&lt;Aliasee&gt;
-</pre>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="namedmetadatastructure">Named Metadata</a>
-</h3>
-
-<div>
-
-<p>Named metadata is a collection of metadata. <a href="#metadata">Metadata
-   nodes</a> (but not metadata strings) are the only valid operands for
-   a named metadata.</p>
-
-<h5>Syntax:</h5>
-<pre class="doc_code">
-; Some unnamed metadata nodes, which are referenced by the named metadata.
-!0 = metadata !{metadata !"zero"}
-!1 = metadata !{metadata !"one"}
-!2 = metadata !{metadata !"two"}
-; A named metadata.
-!name = !{!0, !1, !2}
-</pre>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="paramattrs">Parameter Attributes</a>
-</h3>
-
-<div>
-
-<p>The return type and each parameter of a function type may have a set of
-   <i>parameter attributes</i> associated with them. Parameter attributes are
-   used to communicate additional information about the result or parameters of
-   a function. Parameter attributes are considered to be part of the function,
-   not of the function type, so functions with different parameter attributes
-   can have the same function type.</p>
-
-<p>Parameter attributes are simple keywords that follow the type specified. If
-   multiple parameter attributes are needed, they are space separated. For
-   example:</p>
-
-<pre class="doc_code">
-declare i32 @printf(i8* noalias nocapture, ...)
-declare i32 @atoi(i8 zeroext)
-declare signext i8 @returns_signed_char()
-</pre>
-
-<p>Note that any attributes for the function result (<tt>nounwind</tt>,
-   <tt>readonly</tt>) come immediately after the argument list.</p>
-
-<p>Currently, only the following parameter attributes are defined:</p>
-
-<dl>
-  <dt><tt><b>zeroext</b></tt></dt>
-  <dd>This indicates to the code generator that the parameter or return value
-      should be zero-extended to the extent required by the target's ABI (which
-      is usually 32-bits, but is 8-bits for a i1 on x86-64) by the caller (for a
-      parameter) or the callee (for a return value).</dd>
-
-  <dt><tt><b>signext</b></tt></dt>
-  <dd>This indicates to the code generator that the parameter or return value
-      should be sign-extended to the extent required by the target's ABI (which
-      is usually 32-bits) by the caller (for a parameter) or the callee (for a
-      return value).</dd>
-
-  <dt><tt><b>inreg</b></tt></dt>
-  <dd>This indicates that this parameter or return value should be treated in a
-      special target-dependent fashion during while emitting code for a function
-      call or return (usually, by putting it in a register as opposed to memory,
-      though some targets use it to distinguish between two different kinds of
-      registers).  Use of this attribute is target-specific.</dd>
-
-  <dt><tt><b><a name="byval">byval</a></b></tt></dt>
-  <dd><p>This indicates that the pointer parameter should really be passed by
-      value to the function.  The attribute implies that a hidden copy of the
-      pointee
-      is made between the caller and the callee, so the callee is unable to
-      modify the value in the caller.  This attribute is only valid on LLVM
-      pointer arguments.  It is generally used to pass structs and arrays by
-      value, but is also valid on pointers to scalars.  The copy is considered
-      to belong to the caller not the callee (for example,
-      <tt><a href="#readonly">readonly</a></tt> functions should not write to
-      <tt>byval</tt> parameters). This is not a valid attribute for return
-      values.</p>
-
-      <p>The byval attribute also supports specifying an alignment with
-      the align attribute.  It indicates the alignment of the stack slot to
-      form and the known alignment of the pointer specified to the call site. If
-      the alignment is not specified, then the code generator makes a
-      target-specific assumption.</p></dd>
-
-  <dt><tt><b><a name="sret">sret</a></b></tt></dt>
-  <dd>This indicates that the pointer parameter specifies the address of a
-      structure that is the return value of the function in the source program.
-      This pointer must be guaranteed by the caller to be valid: loads and
-      stores to the structure may be assumed by the callee to not to trap and
-      to be properly aligned.  This may only be applied to the first parameter.
-      This is not a valid attribute for return values. </dd>
-
-  <dt><tt><b><a name="noalias">noalias</a></b></tt></dt>
-  <dd>This indicates that pointer values
-      <a href="#pointeraliasing"><i>based</i></a> on the argument or return
-      value do not alias pointer values which are not <i>based</i> on it,
-      ignoring certain "irrelevant" dependencies.
-      For a call to the parent function, dependencies between memory
-      references from before or after the call and from those during the call
-      are "irrelevant" to the <tt>noalias</tt> keyword for the arguments and
-      return value used in that call.
-      The caller shares the responsibility with the callee for ensuring that
-      these requirements are met.
-      For further details, please see the discussion of the NoAlias response in
-      <a href="AliasAnalysis.html#MustMayNo">alias analysis</a>.<br>
-<br>
-      Note that this definition of <tt>noalias</tt> is intentionally
-      similar to the definition of <tt>restrict</tt> in C99 for function
-      arguments, though it is slightly weaker.
-<br>
-      For function return values, C99's <tt>restrict</tt> is not meaningful,
-      while LLVM's <tt>noalias</tt> is.
-      </dd>
-
-  <dt><tt><b><a name="nocapture">nocapture</a></b></tt></dt>
-  <dd>This indicates that the callee does not make any copies of the pointer
-      that outlive the callee itself. This is not a valid attribute for return
-      values.</dd>
-
-  <dt><tt><b><a name="nest">nest</a></b></tt></dt>
-  <dd>This indicates that the pointer parameter can be excised using the
-      <a href="#int_trampoline">trampoline intrinsics</a>. This is not a valid
-      attribute for return values.</dd>
-</dl>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="gc">Garbage Collector Names</a>
-</h3>
-
-<div>
-
-<p>Each function may specify a garbage collector name, which is simply a
-   string:</p>
-
-<pre class="doc_code">
-define void @f() gc "name" { ... }
-</pre>
-
-<p>The compiler declares the supported values of <i>name</i>. Specifying a
-   collector which will cause the compiler to alter its output in order to
-   support the named garbage collection algorithm.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="fnattrs">Function Attributes</a>
-</h3>
-
-<div>
-
-<p>Function attributes are set to communicate additional information about a
-   function. Function attributes are considered to be part of the function, not
-   of the function type, so functions with different function attributes can
-   have the same function type.</p>
-
-<p>Function attributes are simple keywords that follow the type specified. If
-   multiple attributes are needed, they are space separated. For example:</p>
-
-<pre class="doc_code">
-define void @f() noinline { ... }
-define void @f() alwaysinline { ... }
-define void @f() alwaysinline optsize { ... }
-define void @f() optsize { ... }
-</pre>
-
-<dl>
-  <dt><tt><b>address_safety</b></tt></dt>
-  <dd>This attribute indicates that the address safety analysis
-  is enabled for this function.  </dd>
-
-  <dt><tt><b>alignstack(&lt;<em>n</em>&gt;)</b></tt></dt>
-  <dd>This attribute indicates that, when emitting the prologue and epilogue,
-      the backend should forcibly align the stack pointer. Specify the
-      desired alignment, which must be a power of two, in parentheses.
-
-  <dt><tt><b>alwaysinline</b></tt></dt>
-  <dd>This attribute indicates that the inliner should attempt to inline this
-      function into callers whenever possible, ignoring any active inlining size
-      threshold for this caller.</dd>
-
-  <dt><tt><b>nonlazybind</b></tt></dt>
-  <dd>This attribute suppresses lazy symbol binding for the function. This
-      may make calls to the function faster, at the cost of extra program
-      startup time if the function is not called during program startup.</dd>
-
-  <dt><tt><b>inlinehint</b></tt></dt>
-  <dd>This attribute indicates that the source code contained a hint that inlining
-      this function is desirable (such as the "inline" keyword in C/C++).  It
-      is just a hint; it imposes no requirements on the inliner.</dd>
-
-  <dt><tt><b>naked</b></tt></dt>
-  <dd>This attribute disables prologue / epilogue emission for the function.
-      This can have very system-specific consequences.</dd>
-
-  <dt><tt><b>noimplicitfloat</b></tt></dt>
-  <dd>This attributes disables implicit floating point instructions.</dd>
-
-  <dt><tt><b>noinline</b></tt></dt>
-  <dd>This attribute indicates that the inliner should never inline this
-      function in any situation. This attribute may not be used together with
-      the <tt>alwaysinline</tt> attribute.</dd>
-
-  <dt><tt><b>noredzone</b></tt></dt>
-  <dd>This attribute indicates that the code generator should not use a red
-      zone, even if the target-specific ABI normally permits it.</dd>
-
-  <dt><tt><b>noreturn</b></tt></dt>
-  <dd>This function attribute indicates that the function never returns
-      normally.  This produces undefined behavior at runtime if the function
-      ever does dynamically return.</dd>
-
-  <dt><tt><b>nounwind</b></tt></dt>
-  <dd>This function attribute indicates that the function never returns with an
-      unwind or exceptional control flow.  If the function does unwind, its
-      runtime behavior is undefined.</dd>
-
-  <dt><tt><b>optsize</b></tt></dt>
-  <dd>This attribute suggests that optimization passes and code generator passes
-      make choices that keep the code size of this function low, and otherwise
-      do optimizations specifically to reduce code size.</dd>
-
-  <dt><tt><b>readnone</b></tt></dt>
-  <dd>This attribute indicates that the function computes its result (or decides
-      to unwind an exception) based strictly on its arguments, without
-      dereferencing any pointer arguments or otherwise accessing any mutable
-      state (e.g. memory, control registers, etc) visible to caller functions.
-      It does not write through any pointer arguments
-      (including <tt><a href="#byval">byval</a></tt> arguments) and never
-      changes any state visible to callers.  This means that it cannot unwind
-      exceptions by calling the <tt>C++</tt> exception throwing methods.</dd>
-
-  <dt><tt><b><a name="readonly">readonly</a></b></tt></dt>
-  <dd>This attribute indicates that the function does not write through any
-      pointer arguments (including <tt><a href="#byval">byval</a></tt>
-      arguments) or otherwise modify any state (e.g. memory, control registers,
-      etc) visible to caller functions.  It may dereference pointer arguments
-      and read state that may be set in the caller.  A readonly function always
-      returns the same value (or unwinds an exception identically) when called
-      with the same set of arguments and global state.  It cannot unwind an
-      exception by calling the <tt>C++</tt> exception throwing methods.</dd>
-
-  <dt><tt><b><a name="returns_twice">returns_twice</a></b></tt></dt>
-  <dd>This attribute indicates that this function can return twice. The
-      C <code>setjmp</code> is an example of such a function.  The compiler
-      disables some optimizations (like tail calls) in the caller of these
-      functions.</dd>
-
-  <dt><tt><b><a name="ssp">ssp</a></b></tt></dt>
-  <dd>This attribute indicates that the function should emit a stack smashing
-      protector. It is in the form of a "canary"&mdash;a random value placed on
-      the stack before the local variables that's checked upon return from the
-      function to see if it has been overwritten. A heuristic is used to
-      determine if a function needs stack protectors or not.<br>
-<br>
-      If a function that has an <tt>ssp</tt> attribute is inlined into a
-      function that doesn't have an <tt>ssp</tt> attribute, then the resulting
-      function will have an <tt>ssp</tt> attribute.</dd>
-
-  <dt><tt><b>sspreq</b></tt></dt>
-  <dd>This attribute indicates that the function should <em>always</em> emit a
-      stack smashing protector. This overrides
-      the <tt><a href="#ssp">ssp</a></tt> function attribute.<br>
-<br>
-      If a function that has an <tt>sspreq</tt> attribute is inlined into a
-      function that doesn't have an <tt>sspreq</tt> attribute or which has
-      an <tt>ssp</tt> attribute, then the resulting function will have
-      an <tt>sspreq</tt> attribute.</dd>
-
-  <dt><tt><b><a name="uwtable">uwtable</a></b></tt></dt>
-  <dd>This attribute indicates that the ABI being targeted requires that
-      an unwind table entry be produce for this function even if we can
-      show that no exceptions passes by it. This is normally the case for
-      the ELF x86-64 abi, but it can be disabled for some compilation
-      units.</dd>
-</dl>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="moduleasm">Module-Level Inline Assembly</a>
-</h3>
-
-<div>
-
-<p>Modules may contain "module-level inline asm" blocks, which corresponds to
-   the GCC "file scope inline asm" blocks.  These blocks are internally
-   concatenated by LLVM and treated as a single unit, but may be separated in
-   the <tt>.ll</tt> file if desired.  The syntax is very simple:</p>
-
-<pre class="doc_code">
-module asm "inline asm code goes here"
-module asm "more can go here"
-</pre>
-
-<p>The strings can contain any character by escaping non-printable characters.
-   The escape sequence used is simply "\xx" where "xx" is the two digit hex code
-   for the number.</p>
-
-<p>The inline asm code is simply printed to the machine code .s file when
-   assembly code is generated.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="datalayout">Data Layout</a>
-</h3>
-
-<div>
-
-<p>A module may specify a target specific data layout string that specifies how
-   data is to be laid out in memory. The syntax for the data layout is
-   simply:</p>
-
-<pre class="doc_code">
-target datalayout = "<i>layout specification</i>"
-</pre>
-
-<p>The <i>layout specification</i> consists of a list of specifications
-   separated by the minus sign character ('-').  Each specification starts with
-   a letter and may include other information after the letter to define some
-   aspect of the data layout.  The specifications accepted are as follows:</p>
-
-<dl>
-  <dt><tt>E</tt></dt>
-  <dd>Specifies that the target lays out data in big-endian form. That is, the
-      bits with the most significance have the lowest address location.</dd>
-
-  <dt><tt>e</tt></dt>
-  <dd>Specifies that the target lays out data in little-endian form. That is,
-      the bits with the least significance have the lowest address
-      location.</dd>
-
-  <dt><tt>S<i>size</i></tt></dt>
-  <dd>Specifies the natural alignment of the stack in bits. Alignment promotion
-      of stack variables is limited to the natural stack alignment to avoid
-      dynamic stack realignment. The stack alignment must be a multiple of
-      8-bits. If omitted, the natural stack alignment defaults to "unspecified",
-      which does not prevent any alignment promotions.</dd>
-
-  <dt><tt>p[n]:<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
-  <dd>This specifies the <i>size</i> of a pointer and its <i>abi</i> and
-      <i>preferred</i> alignments for address space <i>n</i>. All sizes are in
-      bits. Specifying the <i>pref</i> alignment is optional. If omitted, the
-      preceding <tt>:</tt> should be omitted too. The address space,
-      <i>n</i> is optional, and if not specified, denotes the default address
-      space 0. The value of <i>n</i> must be in the range [1,2^23).</dd>
-
-  <dt><tt>i<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
-  <dd>This specifies the alignment for an integer type of a given bit
-      <i>size</i>. The value of <i>size</i> must be in the range [1,2^23).</dd>
-
-  <dt><tt>v<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
-  <dd>This specifies the alignment for a vector type of a given bit
-      <i>size</i>.</dd>
-
-  <dt><tt>f<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
-  <dd>This specifies the alignment for a floating point type of a given bit
-      <i>size</i>. Only values of <i>size</i> that are supported by the target
-      will work.  32 (float) and 64 (double) are supported on all targets;
-      80 or 128 (different flavors of long double) are also supported on some
-      targets.
-
-  <dt><tt>a<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
-  <dd>This specifies the alignment for an aggregate type of a given bit
-      <i>size</i>.</dd>
-
-  <dt><tt>s<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
-  <dd>This specifies the alignment for a stack object of a given bit
-      <i>size</i>.</dd>
-
-  <dt><tt>n<i>size1</i>:<i>size2</i>:<i>size3</i>...</tt></dt>
-  <dd>This specifies a set of native integer widths for the target CPU
-      in bits.  For example, it might contain "n32" for 32-bit PowerPC,
-      "n32:64" for PowerPC 64, or "n8:16:32:64" for X86-64.  Elements of
-      this set are considered to support most general arithmetic
-      operations efficiently.</dd>
-</dl>
-
-<p>When constructing the data layout for a given target, LLVM starts with a
-   default set of specifications which are then (possibly) overridden by the
-   specifications in the <tt>datalayout</tt> keyword. The default specifications
-   are given in this list:</p>
-
-<ul>
-  <li><tt>E</tt> - big endian</li>
-  <li><tt>p:64:64:64</tt> - 64-bit pointers with 64-bit alignment</li>
-  <li><tt>p1:32:32:32</tt> - 32-bit pointers with 32-bit alignment for
-  address space 1</li>
-  <li><tt>p2:16:32:32</tt> - 16-bit pointers with 32-bit alignment for
-  address space 2</li>
-  <li><tt>i1:8:8</tt> - i1 is 8-bit (byte) aligned</li>
-  <li><tt>i8:8:8</tt> - i8 is 8-bit (byte) aligned</li>
-  <li><tt>i16:16:16</tt> - i16 is 16-bit aligned</li>
-  <li><tt>i32:32:32</tt> - i32 is 32-bit aligned</li>
-  <li><tt>i64:32:64</tt> - i64 has ABI alignment of 32-bits but preferred
-  alignment of 64-bits</li>
-  <li><tt>f32:32:32</tt> - float is 32-bit aligned</li>
-  <li><tt>f64:64:64</tt> - double is 64-bit aligned</li>
-  <li><tt>v64:64:64</tt> - 64-bit vector is 64-bit aligned</li>
-  <li><tt>v128:128:128</tt> - 128-bit vector is 128-bit aligned</li>
-  <li><tt>a0:0:1</tt> - aggregates are 8-bit aligned</li>
-  <li><tt>s0:64:64</tt> - stack objects are 64-bit aligned</li>
-</ul>
-
-<p>When LLVM is determining the alignment for a given type, it uses the
-   following rules:</p>
-
-<ol>
-  <li>If the type sought is an exact match for one of the specifications, that
-      specification is used.</li>
-
-  <li>If no match is found, and the type sought is an integer type, then the
-      smallest integer type that is larger than the bitwidth of the sought type
-      is used. If none of the specifications are larger than the bitwidth then
-      the largest integer type is used. For example, given the default
-      specifications above, the i7 type will use the alignment of i8 (next
-      largest) while both i65 and i256 will use the alignment of i64 (largest
-      specified).</li>
-
-  <li>If no match is found, and the type sought is a vector type, then the
-      largest vector type that is smaller than the sought vector type will be
-      used as a fall back.  This happens because &lt;128 x double&gt; can be
-      implemented in terms of 64 &lt;2 x double&gt;, for example.</li>
-</ol>
-
-<p>The function of the data layout string may not be what you expect.  Notably,
-   this is not a specification from the frontend of what alignment the code
-   generator should use.</p>
-
-<p>Instead, if specified, the target data layout is required to match what the
-   ultimate <em>code generator</em> expects.  This string is used by the
-   mid-level optimizers to
-   improve code, and this only works if it matches what the ultimate code
-   generator uses.  If you would like to generate IR that does not embed this
-   target-specific detail into the IR, then you don't have to specify the
-   string.  This will disable some optimizations that require precise layout
-   information, but this also prevents those optimizations from introducing
-   target specificity into the IR.</p>
-
-
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="pointeraliasing">Pointer Aliasing Rules</a>
-</h3>
-
-<div>
-
-<p>Any memory access must be done through a pointer value associated
-with an address range of the memory access, otherwise the behavior
-is undefined. Pointer values are associated with address ranges
-according to the following rules:</p>
-
-<ul>
-  <li>A pointer value is associated with the addresses associated with
-      any value it is <i>based</i> on.
-  <li>An address of a global variable is associated with the address
-      range of the variable's storage.</li>
-  <li>The result value of an allocation instruction is associated with
-      the address range of the allocated storage.</li>
-  <li>A null pointer in the default address-space is associated with
-      no address.</li>
-  <li>An integer constant other than zero or a pointer value returned
-      from a function not defined within LLVM may be associated with address
-      ranges allocated through mechanisms other than those provided by
-      LLVM. Such ranges shall not overlap with any ranges of addresses
-      allocated by mechanisms provided by LLVM.</li>
-</ul>
-
-<p>A pointer value is <i>based</i> on another pointer value according
-   to the following rules:</p>
-
-<ul>
-  <li>A pointer value formed from a
-      <tt><a href="#i_getelementptr">getelementptr</a></tt> operation
-      is <i>based</i> on the first operand of the <tt>getelementptr</tt>.</li>
-  <li>The result value of a
-      <tt><a href="#i_bitcast">bitcast</a></tt> is <i>based</i> on the operand
-      of the <tt>bitcast</tt>.</li>
-  <li>A pointer value formed by an
-      <tt><a href="#i_inttoptr">inttoptr</a></tt> is <i>based</i> on all
-      pointer values that contribute (directly or indirectly) to the
-      computation of the pointer's value.</li>
-  <li>The "<i>based</i> on" relationship is transitive.</li>
-</ul>
-
-<p>Note that this definition of <i>"based"</i> is intentionally
-   similar to the definition of <i>"based"</i> in C99, though it is
-   slightly weaker.</p>
-
-<p>LLVM IR does not associate types with memory. The result type of a
-<tt><a href="#i_load">load</a></tt> merely indicates the size and
-alignment of the memory from which to load, as well as the
-interpretation of the value. The first operand type of a
-<tt><a href="#i_store">store</a></tt> similarly only indicates the size
-and alignment of the store.</p>
-
-<p>Consequently, type-based alias analysis, aka TBAA, aka
-<tt>-fstrict-aliasing</tt>, is not applicable to general unadorned
-LLVM IR. <a href="#metadata">Metadata</a> may be used to encode
-additional information which specialized optimization passes may use
-to implement type-based alias analysis.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="volatile">Volatile Memory Accesses</a>
-</h3>
-
-<div>
-
-<p>Certain memory accesses, such as <a href="#i_load"><tt>load</tt></a>s, <a
-href="#i_store"><tt>store</tt></a>s, and <a
-href="#int_memcpy"><tt>llvm.memcpy</tt></a>s may be marked <tt>volatile</tt>.
-The optimizers must not change the number of volatile operations or change their
-order of execution relative to other volatile operations.  The optimizers
-<i>may</i> change the order of volatile operations relative to non-volatile
-operations.  This is not Java's "volatile" and has no cross-thread
-synchronization behavior.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="memmodel">Memory Model for Concurrent Operations</a>
-</h3>
-
-<div>
-
-<p>The LLVM IR does not define any way to start parallel threads of execution
-or to register signal handlers. Nonetheless, there are platform-specific
-ways to create them, and we define LLVM IR's behavior in their presence. This
-model is inspired by the C++0x memory model.</p>
-
-<p>For a more informal introduction to this model, see the
-<a href="Atomics.html">LLVM Atomic Instructions and Concurrency Guide</a>.
-
-<p>We define a <i>happens-before</i> partial order as the least partial order
-that</p>
-<ul>
-  <li>Is a superset of single-thread program order, and</li>
-  <li>When a <i>synchronizes-with</i> <tt>b</tt>, includes an edge from
-      <tt>a</tt> to <tt>b</tt>. <i>Synchronizes-with</i> pairs are introduced
-      by platform-specific techniques, like pthread locks, thread
-      creation, thread joining, etc., and by atomic instructions.
-      (See also <a href="#ordering">Atomic Memory Ordering Constraints</a>).
-      </li>
-</ul>
-
-<p>Note that program order does not introduce <i>happens-before</i> edges
-between a thread and signals executing inside that thread.</p>
-
-<p>Every (defined) read operation (load instructions, memcpy, atomic
-loads/read-modify-writes, etc.) <var>R</var> reads a series of bytes written by
-(defined) write operations (store instructions, atomic
-stores/read-modify-writes, memcpy, etc.). For the purposes of this section,
-initialized globals are considered to have a write of the initializer which is
-atomic and happens before any other read or write of the memory in question.
-For each byte of a read <var>R</var>, <var>R<sub>byte</sub></var> may see
-any write to the same byte, except:</p>
-
-<ul>
-  <li>If <var>write<sub>1</sub></var> happens before
-      <var>write<sub>2</sub></var>, and <var>write<sub>2</sub></var> happens
-      before <var>R<sub>byte</sub></var>, then <var>R<sub>byte</sub></var>
-      does not see <var>write<sub>1</sub></var>.
-  <li>If <var>R<sub>byte</sub></var> happens before
-      <var>write<sub>3</sub></var>, then <var>R<sub>byte</sub></var> does not
-      see <var>write<sub>3</sub></var>.
-</ul>
-
-<p>Given that definition, <var>R<sub>byte</sub></var> is defined as follows:
-<ul>
-  <li>If <var>R</var> is volatile, the result is target-dependent. (Volatile
-      is supposed to give guarantees which can support
-      <code>sig_atomic_t</code> in C/C++, and may be used for accesses to
-      addresses which do not behave like normal memory.  It does not generally
-      provide cross-thread synchronization.)
-  <li>Otherwise, if there is no write to the same byte that happens before
-    <var>R<sub>byte</sub></var>, <var>R<sub>byte</sub></var> returns
-    <tt>undef</tt> for that byte.
-  <li>Otherwise, if <var>R<sub>byte</sub></var> may see exactly one write,
-      <var>R<sub>byte</sub></var> returns the value written by that
-      write.</li>
-  <li>Otherwise, if <var>R</var> is atomic, and all the writes
-      <var>R<sub>byte</sub></var> may see are atomic, it chooses one of the
-      values written.  See the <a href="#ordering">Atomic Memory Ordering
-      Constraints</a> section for additional constraints on how the choice
-      is made.
-  <li>Otherwise <var>R<sub>byte</sub></var> returns <tt>undef</tt>.</li>
-</ul>
-
-<p><var>R</var> returns the value composed of the series of bytes it read.
-This implies that some bytes within the value may be <tt>undef</tt>
-<b>without</b> the entire value being <tt>undef</tt>. Note that this only
-defines the semantics of the operation; it doesn't mean that targets will
-emit more than one instruction to read the series of bytes.</p>
-
-<p>Note that in cases where none of the atomic intrinsics are used, this model
-places only one restriction on IR transformations on top of what is required
-for single-threaded execution: introducing a store to a byte which might not
-otherwise be stored is not allowed in general.  (Specifically, in the case
-where another thread might write to and read from an address, introducing a
-store can change a load that may see exactly one write into a load that may
-see multiple writes.)</p>
-
-<!-- FIXME: This model assumes all targets where concurrency is relevant have
-a byte-size store which doesn't affect adjacent bytes.  As far as I can tell,
-none of the backends currently in the tree fall into this category; however,
-there might be targets which care.  If there are, we want a paragraph
-like the following:
-
-Targets may specify that stores narrower than a certain width are not
-available; on such a target, for the purposes of this model, treat any
-non-atomic write with an alignment or width less than the minimum width
-as if it writes to the relevant surrounding bytes.
--->
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-      <a name="ordering">Atomic Memory Ordering Constraints</a>
-</h3>
-
-<div>
-
-<p>Atomic instructions (<a href="#i_cmpxchg"><code>cmpxchg</code></a>,
-<a href="#i_atomicrmw"><code>atomicrmw</code></a>,
-<a href="#i_fence"><code>fence</code></a>,
-<a href="#i_load"><code>atomic load</code></a>, and
-<a href="#i_store"><code>atomic store</code></a>) take an ordering parameter
-that determines which other atomic instructions on the same address they
-<i>synchronize with</i>.  These semantics are borrowed from Java and C++0x,
-but are somewhat more colloquial. If these descriptions aren't precise enough,
-check those specs (see spec references in the
-<a href="Atomics.html#introduction">atomics guide</a>).
-<a href="#i_fence"><code>fence</code></a> instructions
-treat these orderings somewhat differently since they don't take an address.
-See that instruction's documentation for details.</p>
-
-<p>For a simpler introduction to the ordering constraints, see the
-<a href="Atomics.html">LLVM Atomic Instructions and Concurrency Guide</a>.</p>
-
-<dl>
-<dt><code>unordered</code></dt>
-<dd>The set of values that can be read is governed by the happens-before
-partial order. A value cannot be read unless some operation wrote it.
-This is intended to provide a guarantee strong enough to model Java's
-non-volatile shared variables.  This ordering cannot be specified for
-read-modify-write operations; it is not strong enough to make them atomic
-in any interesting way.</dd>
-<dt><code>monotonic</code></dt>
-<dd>In addition to the guarantees of <code>unordered</code>, there is a single
-total order for modifications by <code>monotonic</code> operations on each
-address. All modification orders must be compatible with the happens-before
-order. There is no guarantee that the modification orders can be combined to
-a global total order for the whole program (and this often will not be
-possible). The read in an atomic read-modify-write operation
-(<a href="#i_cmpxchg"><code>cmpxchg</code></a> and
-<a href="#i_atomicrmw"><code>atomicrmw</code></a>)
-reads the value in the modification order immediately before the value it
-writes. If one atomic read happens before another atomic read of the same
-address, the later read must see the same value or a later value in the
-address's modification order. This disallows reordering of
-<code>monotonic</code> (or stronger) operations on the same address. If an
-address is written <code>monotonic</code>ally by one thread, and other threads
-<code>monotonic</code>ally read that address repeatedly, the other threads must
-eventually see the write. This corresponds to the C++0x/C1x
-<code>memory_order_relaxed</code>.</dd>
-<dt><code>acquire</code></dt>
-<dd>In addition to the guarantees of <code>monotonic</code>,
-a <i>synchronizes-with</i> edge may be formed with a <code>release</code>
-operation. This is intended to model C++'s <code>memory_order_acquire</code>.</dd>
-<dt><code>release</code></dt>
-<dd>In addition to the guarantees of <code>monotonic</code>, if this operation
-writes a value which is subsequently read by an <code>acquire</code> operation,
-it <i>synchronizes-with</i> that operation.  (This isn't a complete
-description; see the C++0x definition of a release sequence.) This corresponds
-to the C++0x/C1x <code>memory_order_release</code>.</dd>
-<dt><code>acq_rel</code> (acquire+release)</dt><dd>Acts as both an
-<code>acquire</code> and <code>release</code> operation on its address.
-This corresponds to the C++0x/C1x <code>memory_order_acq_rel</code>.</dd>
-<dt><code>seq_cst</code> (sequentially consistent)</dt><dd>
-<dd>In addition to the guarantees of <code>acq_rel</code>
-(<code>acquire</code> for an operation which only reads, <code>release</code>
-for an operation which only writes), there is a global total order on all
-sequentially-consistent operations on all addresses, which is consistent with
-the <i>happens-before</i> partial order and with the modification orders of
-all the affected addresses. Each sequentially-consistent read sees the last
-preceding write to the same address in this global order. This corresponds
-to the C++0x/C1x <code>memory_order_seq_cst</code> and Java volatile.</dd>
-</dl>
-
-<p id="singlethread">If an atomic operation is marked <code>singlethread</code>,
-it only <i>synchronizes with</i> or participates in modification and seq_cst
-total orderings with other operations running in the same thread (for example,
-in signal handlers).</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="typesystem">Type System</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The LLVM type system is one of the most important features of the
-   intermediate representation.  Being typed enables a number of optimizations
-   to be performed on the intermediate representation directly, without having
-   to do extra analyses on the side before the transformation.  A strong type
-   system makes it easier to read the generated code and enables novel analyses
-   and transformations that are not feasible to perform on normal three address
-   code representations.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="t_classifications">Type Classifications</a>
-</h3>
-
-<div>
-
-<p>The types fall into a few useful classifications:</p>
-
-<table border="1" cellspacing="0" cellpadding="4">
-  <tbody>
-    <tr><th>Classification</th><th>Types</th></tr>
-    <tr>
-      <td><a href="#t_integer">integer</a></td>
-      <td><tt>i1, i2, i3, ... i8, ... i16, ... i32, ... i64, ... </tt></td>
-    </tr>
-    <tr>
-      <td><a href="#t_floating">floating point</a></td>
-      <td><tt>half, float, double, x86_fp80, fp128, ppc_fp128</tt></td>
-    </tr>
-    <tr>
-      <td><a name="t_firstclass">first class</a></td>
-      <td><a href="#t_integer">integer</a>,
-          <a href="#t_floating">floating point</a>,
-          <a href="#t_pointer">pointer</a>,
-          <a href="#t_vector">vector</a>,
-          <a href="#t_struct">structure</a>,
-          <a href="#t_array">array</a>,
-          <a href="#t_label">label</a>,
-          <a href="#t_metadata">metadata</a>.
-      </td>
-    </tr>
-    <tr>
-      <td><a href="#t_primitive">primitive</a></td>
-      <td><a href="#t_label">label</a>,
-          <a href="#t_void">void</a>,
-          <a href="#t_integer">integer</a>,
-          <a href="#t_floating">floating point</a>,
-          <a href="#t_x86mmx">x86mmx</a>,
-          <a href="#t_metadata">metadata</a>.</td>
-    </tr>
-    <tr>
-      <td><a href="#t_derived">derived</a></td>
-      <td><a href="#t_array">array</a>,
-          <a href="#t_function">function</a>,
-          <a href="#t_pointer">pointer</a>,
-          <a href="#t_struct">structure</a>,
-          <a href="#t_vector">vector</a>,
-          <a href="#t_opaque">opaque</a>.
-      </td>
-    </tr>
-  </tbody>
-</table>
-
-<p>The <a href="#t_firstclass">first class</a> types are perhaps the most
-   important.  Values of these types are the only ones which can be produced by
-   instructions.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="t_primitive">Primitive Types</a>
-</h3>
-
-<div>
-
-<p>The primitive types are the fundamental building blocks of the LLVM
-   system.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_integer">Integer Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The integer type is a very simple type that simply specifies an arbitrary
-   bit width for the integer type desired. Any bit width from 1 bit to
-   2<sup>23</sup>-1 (about 8 million) can be specified.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  iN
-</pre>
-
-<p>The number of bits the integer will occupy is specified by the <tt>N</tt>
-   value.</p>
-
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>i1</tt></td>
-    <td class="left">a single-bit integer.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>i32</tt></td>
-    <td class="left">a 32-bit integer.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>i1942652</tt></td>
-    <td class="left">a really big integer of over 1 million bits.</td>
-  </tr>
-</table>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_floating">Floating Point Types</a>
-</h4>
-
-<div>
-
-<table>
-  <tbody>
-    <tr><th>Type</th><th>Description</th></tr>
-    <tr><td><tt>half</tt></td><td>16-bit floating point value</td></tr>
-    <tr><td><tt>float</tt></td><td>32-bit floating point value</td></tr>
-    <tr><td><tt>double</tt></td><td>64-bit floating point value</td></tr>
-    <tr><td><tt>fp128</tt></td><td>128-bit floating point value (112-bit mantissa)</td></tr>
-    <tr><td><tt>x86_fp80</tt></td><td>80-bit floating point value (X87)</td></tr>
-    <tr><td><tt>ppc_fp128</tt></td><td>128-bit floating point value (two 64-bits)</td></tr>
-  </tbody>
-</table>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_x86mmx">X86mmx Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The x86mmx type represents a value held in an MMX register on an x86 machine.  The operations allowed on it are quite limited:  parameters and return values, load and store, and bitcast.  User-specified MMX instructions are represented as intrinsic or asm calls with arguments and/or results of this type.  There are no arrays, vectors or constants of this type.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  x86mmx
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_void">Void Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The void type does not represent any value and has no size.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  void
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_label">Label Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The label type represents code labels.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  label
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_metadata">Metadata Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The metadata type represents embedded metadata. No derived types may be
-   created from metadata except for <a href="#t_function">function</a>
-   arguments.
-
-<h5>Syntax:</h5>
-<pre>
-  metadata
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="t_derived">Derived Types</a>
-</h3>
-
-<div>
-
-<p>The real power in LLVM comes from the derived types in the system.  This is
-   what allows a programmer to represent arrays, functions, pointers, and other
-   useful types.  Each of these types contain one or more element types which
-   may be a primitive type, or another derived type.  For example, it is
-   possible to have a two dimensional array, using an array as the element type
-   of another array.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_aggregate">Aggregate Types</a>
-</h4>
-
-<div>
-
-<p>Aggregate Types are a subset of derived types that can contain multiple
-  member types. <a href="#t_array">Arrays</a> and
-  <a href="#t_struct">structs</a> are aggregate types.
-  <a href="#t_vector">Vectors</a> are not considered to be aggregate types.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_array">Array Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The array type is a very simple derived type that arranges elements
-   sequentially in memory.  The array type requires a size (number of elements)
-   and an underlying data type.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  [&lt;# elements&gt; x &lt;elementtype&gt;]
-</pre>
-
-<p>The number of elements is a constant integer value; <tt>elementtype</tt> may
-   be any type with a size.</p>
-
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>[40 x i32]</tt></td>
-    <td class="left">Array of 40 32-bit integer values.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>[41 x i32]</tt></td>
-    <td class="left">Array of 41 32-bit integer values.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>[4 x i8]</tt></td>
-    <td class="left">Array of 4 8-bit integer values.</td>
-  </tr>
-</table>
-<p>Here are some examples of multidimensional arrays:</p>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>[3 x [4 x i32]]</tt></td>
-    <td class="left">3x4 array of 32-bit integer values.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>[12 x [10 x float]]</tt></td>
-    <td class="left">12x10 array of single precision floating point values.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>[2 x [3 x [4 x i16]]]</tt></td>
-    <td class="left">2x3x4 array of 16-bit integer  values.</td>
-  </tr>
-</table>
-
-<p>There is no restriction on indexing beyond the end of the array implied by
-   a static type (though there are restrictions on indexing beyond the bounds
-   of an allocated object in some cases). This means that single-dimension
-   'variable sized array' addressing can be implemented in LLVM with a zero
-   length array type. An implementation of 'pascal style arrays' in LLVM could
-   use the type "<tt>{ i32, [0 x float]}</tt>", for example.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_function">Function Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The function type can be thought of as a function signature.  It consists of
-   a return type and a list of formal parameter types. The return type of a
-   function type is a first class type or a void type.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;returntype&gt; (&lt;parameter list&gt;)
-</pre>
-
-<p>...where '<tt>&lt;parameter list&gt;</tt>' is a comma-separated list of type
-   specifiers.  Optionally, the parameter list may include a type <tt>...</tt>,
-   which indicates that the function takes a variable number of arguments.
-   Variable argument functions can access their arguments with
-   the <a href="#int_varargs">variable argument handling intrinsic</a>
-   functions.  '<tt>&lt;returntype&gt;</tt>' is any type except
-   <a href="#t_label">label</a>.</p>
-
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>i32 (i32)</tt></td>
-    <td class="left">function taking an <tt>i32</tt>, returning an <tt>i32</tt>
-    </td>
-  </tr><tr class="layout">
-    <td class="left"><tt>float&nbsp;(i16,&nbsp;i32&nbsp;*)&nbsp;*
-    </tt></td>
-    <td class="left"><a href="#t_pointer">Pointer</a> to a function that takes
-      an <tt>i16</tt> and a <a href="#t_pointer">pointer</a> to <tt>i32</tt>,
-      returning <tt>float</tt>.
-    </td>
-  </tr><tr class="layout">
-    <td class="left"><tt>i32 (i8*, ...)</tt></td>
-    <td class="left">A vararg function that takes at least one
-      <a href="#t_pointer">pointer</a> to <tt>i8 </tt> (char in C),
-      which returns an integer.  This is the signature for <tt>printf</tt> in
-      LLVM.
-    </td>
-  </tr><tr class="layout">
-    <td class="left"><tt>{i32, i32} (i32)</tt></td>
-    <td class="left">A function taking an <tt>i32</tt>, returning a
-        <a href="#t_struct">structure</a> containing two <tt>i32</tt> values
-    </td>
-  </tr>
-</table>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_struct">Structure Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The structure type is used to represent a collection of data members together
-  in memory.  The elements of a structure may be any type that has a size.</p>
-
-<p>Structures in memory are accessed using '<tt><a href="#i_load">load</a></tt>'
-   and '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a field
-   with the '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.
-   Structures in registers are accessed using the
-   '<tt><a href="#i_extractvalue">extractvalue</a></tt>' and
-   '<tt><a href="#i_insertvalue">insertvalue</a></tt>' instructions.</p>
-
-<p>Structures may optionally be "packed" structures, which indicate that the
-  alignment of the struct is one byte, and that there is no padding between
-  the elements.  In non-packed structs, padding between field types is inserted
-  as defined by the DataLayout string in the module, which is required to match
-  what the underlying code generator expects.</p>
-
-<p>Structures can either be "literal" or "identified".  A literal structure is
-  defined inline with other types (e.g. <tt>{i32, i32}*</tt>) whereas identified
-  types are always defined at the top level with a name.  Literal types are
-  uniqued by their contents and can never be recursive or opaque since there is
-  no way to write one.  Identified types can be recursive, can be opaqued, and are
-  never uniqued.
-</p>
-
-<h5>Syntax:</h5>
-<pre>
-  %T1 = type { &lt;type list&gt; }     <i>; Identified normal struct type</i>
-  %T2 = type &lt;{ &lt;type list&gt; }&gt;   <i>; Identified packed struct type</i>
-</pre>
-
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>{ i32, i32, i32 }</tt></td>
-    <td class="left">A triple of three <tt>i32</tt> values</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>{&nbsp;float,&nbsp;i32&nbsp;(i32)&nbsp;*&nbsp;}</tt></td>
-    <td class="left">A pair, where the first element is a <tt>float</tt> and the
-      second element is a <a href="#t_pointer">pointer</a> to a
-      <a href="#t_function">function</a> that takes an <tt>i32</tt>, returning
-      an <tt>i32</tt>.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>&lt;{ i8, i32 }&gt;</tt></td>
-    <td class="left">A packed struct known to be 5 bytes in size.</td>
-  </tr>
-</table>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_opaque">Opaque Structure Types</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>Opaque structure types are used to represent named structure types that do
-   not have a body specified.  This corresponds (for example) to the C notion of
-   a forward declared structure.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  %X = type opaque
-  %52 = type opaque
-</pre>
-
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>opaque</tt></td>
-    <td class="left">An opaque type.</td>
-  </tr>
-</table>
-
-</div>
-
-
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_pointer">Pointer Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>The pointer type is used to specify memory locations.
-   Pointers are commonly used to reference objects in memory.</p>
-
-<p>Pointer types may have an optional address space attribute defining the
-   numbered address space where the pointed-to object resides. The default
-   address space is number zero. The semantics of non-zero address
-   spaces are target-specific.</p>
-
-<p>Note that LLVM does not permit pointers to void (<tt>void*</tt>) nor does it
-   permit pointers to labels (<tt>label*</tt>).  Use <tt>i8*</tt> instead.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;type&gt; *
-</pre>
-
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>[4 x i32]*</tt></td>
-    <td class="left">A <a href="#t_pointer">pointer</a> to <a
-                    href="#t_array">array</a> of four <tt>i32</tt> values.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>i32 (i32*) *</tt></td>
-    <td class="left"> A <a href="#t_pointer">pointer</a> to a <a
-      href="#t_function">function</a> that takes an <tt>i32*</tt>, returning an
-      <tt>i32</tt>.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>i32 addrspace(5)*</tt></td>
-    <td class="left">A <a href="#t_pointer">pointer</a> to an <tt>i32</tt> value
-     that resides in address space #5.</td>
-  </tr>
-</table>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_vector">Vector Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>A vector type is a simple derived type that represents a vector of elements.
-   Vector types are used when multiple primitive data are operated in parallel
-   using a single instruction (SIMD).  A vector type requires a size (number of
-   elements) and an underlying primitive data type.  Vector types are considered
-   <a href="#t_firstclass">first class</a>.</p>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt; &lt;# elements&gt; x &lt;elementtype&gt; &gt;
-</pre>
-
-<p>The number of elements is a constant integer value larger than 0; elementtype
-   may be any integer or floating point type, or a pointer to these types.
-   Vectors of size zero are not allowed. </p>
-
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>&lt;4 x i32&gt;</tt></td>
-    <td class="left">Vector of 4 32-bit integer values.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>&lt;8 x float&gt;</tt></td>
-    <td class="left">Vector of 8 32-bit floating-point values.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>&lt;2 x i64&gt;</tt></td>
-    <td class="left">Vector of 2 64-bit integer values.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>&lt;4 x i64*&gt;</tt></td>
-    <td class="left">Vector of 4 pointers to 64-bit integer values.</td>
-  </tr>
-</table>
-
-</div>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="constants">Constants</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>LLVM has several different basic types of constants.  This section describes
-   them all and their syntax.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="simpleconstants">Simple Constants</a>
-</h3>
-
-<div>
-
-<dl>
-  <dt><b>Boolean constants</b></dt>
-  <dd>The two strings '<tt>true</tt>' and '<tt>false</tt>' are both valid
-      constants of the <tt><a href="#t_integer">i1</a></tt> type.</dd>
-
-  <dt><b>Integer constants</b></dt>
-  <dd>Standard integers (such as '4') are constants of
-      the <a href="#t_integer">integer</a> type.  Negative numbers may be used
-      with integer types.</dd>
-
-  <dt><b>Floating point constants</b></dt>
-  <dd>Floating point constants use standard decimal notation (e.g. 123.421),
-      exponential notation (e.g. 1.23421e+2), or a more precise hexadecimal
-      notation (see below).  The assembler requires the exact decimal value of a
-      floating-point constant.  For example, the assembler accepts 1.25 but
-      rejects 1.3 because 1.3 is a repeating decimal in binary.  Floating point
-      constants must have a <a href="#t_floating">floating point</a> type. </dd>
-
-  <dt><b>Null pointer constants</b></dt>
-  <dd>The identifier '<tt>null</tt>' is recognized as a null pointer constant
-      and must be of <a href="#t_pointer">pointer type</a>.</dd>
-</dl>
-
-<p>The one non-intuitive notation for constants is the hexadecimal form of
-   floating point constants.  For example, the form '<tt>double
-   0x432ff973cafa8000</tt>' is equivalent to (but harder to read than)
-   '<tt>double 4.5e+15</tt>'.  The only time hexadecimal floating point
-   constants are required (and the only time that they are generated by the
-   disassembler) is when a floating point constant must be emitted but it cannot
-   be represented as a decimal floating point number in a reasonable number of
-   digits.  For example, NaN's, infinities, and other special values are
-   represented in their IEEE hexadecimal format so that assembly and disassembly
-   do not cause any bits to change in the constants.</p>
-
-<p>When using the hexadecimal form, constants of types half, float, and double are
-   represented using the 16-digit form shown above (which matches the IEEE754
-   representation for double); half and float values must, however, be exactly
-   representable as IEE754 half and single precision, respectively.
-   Hexadecimal format is always used
-   for long double, and there are three forms of long double.  The 80-bit format
-   used by x86 is represented as <tt>0xK</tt> followed by 20 hexadecimal digits.
-   The 128-bit format used by PowerPC (two adjacent doubles) is represented
-   by <tt>0xM</tt> followed by 32 hexadecimal digits.  The IEEE 128-bit format
-   is represented by <tt>0xL</tt> followed by 32 hexadecimal digits; no
-   currently supported target uses this format.  Long doubles will only work if
-   they match the long double format on your target. The IEEE 16-bit format
-   (half precision) is represented by <tt>0xH</tt> followed by 4 hexadecimal
-   digits. All hexadecimal formats are big-endian (sign bit at the left).</p>
-
-<p>There are no constants of type x86mmx.</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-<a name="aggregateconstants"></a> <!-- old anchor -->
-<a name="complexconstants">Complex Constants</a>
-</h3>
-
-<div>
-
-<p>Complex constants are a (potentially recursive) combination of simple
-   constants and smaller complex constants.</p>
-
-<dl>
-  <dt><b>Structure constants</b></dt>
-  <dd>Structure constants are represented with notation similar to structure
-      type definitions (a comma separated list of elements, surrounded by braces
-      (<tt>{}</tt>)).  For example: "<tt>{ i32 4, float 17.0, i32* @G }</tt>",
-      where "<tt>@G</tt>" is declared as "<tt>@G = external global i32</tt>".
-      Structure constants must have <a href="#t_struct">structure type</a>, and
-      the number and types of elements must match those specified by the
-      type.</dd>
-
-  <dt><b>Array constants</b></dt>
-  <dd>Array constants are represented with notation similar to array type
-     definitions (a comma separated list of elements, surrounded by square
-     brackets (<tt>[]</tt>)).  For example: "<tt>[ i32 42, i32 11, i32 74
-     ]</tt>".  Array constants must have <a href="#t_array">array type</a>, and
-     the number and types of elements must match those specified by the
-     type.</dd>
-
-  <dt><b>Vector constants</b></dt>
-  <dd>Vector constants are represented with notation similar to vector type
-      definitions (a comma separated list of elements, surrounded by
-      less-than/greater-than's (<tt>&lt;&gt;</tt>)).  For example: "<tt>&lt; i32
-      42, i32 11, i32 74, i32 100 &gt;</tt>".  Vector constants must
-      have <a href="#t_vector">vector type</a>, and the number and types of
-      elements must match those specified by the type.</dd>
-
-  <dt><b>Zero initialization</b></dt>
-  <dd>The string '<tt>zeroinitializer</tt>' can be used to zero initialize a
-      value to zero of <em>any</em> type, including scalar and
-      <a href="#t_aggregate">aggregate</a> types.
-      This is often used to avoid having to print large zero initializers
-      (e.g. for large arrays) and is always exactly equivalent to using explicit
-      zero initializers.</dd>
-
-  <dt><b>Metadata node</b></dt>
-  <dd>A metadata node is a structure-like constant with
-      <a href="#t_metadata">metadata type</a>.  For example: "<tt>metadata !{
-      i32 0, metadata !"test" }</tt>".  Unlike other constants that are meant to
-      be interpreted as part of the instruction stream, metadata is a place to
-      attach additional information such as debug info.</dd>
-</dl>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="globalconstants">Global Variable and Function Addresses</a>
-</h3>
-
-<div>
-
-<p>The addresses of <a href="#globalvars">global variables</a>
-   and <a href="#functionstructure">functions</a> are always implicitly valid
-   (link-time) constants.  These constants are explicitly referenced when
-   the <a href="#identifiers">identifier for the global</a> is used and always
-   have <a href="#t_pointer">pointer</a> type. For example, the following is a
-   legal LLVM file:</p>
-
-<pre class="doc_code">
-@X = global i32 17
-@Y = global i32 42
-@Z = global [2 x i32*] [ i32* @X, i32* @Y ]
-</pre>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="undefvalues">Undefined Values</a>
-</h3>
-
-<div>
-
-<p>The string '<tt>undef</tt>' can be used anywhere a constant is expected, and
-   indicates that the user of the value may receive an unspecified bit-pattern.
-   Undefined values may be of any type (other than '<tt>label</tt>'
-   or '<tt>void</tt>') and be used anywhere a constant is permitted.</p>
-
-<p>Undefined values are useful because they indicate to the compiler that the
-   program is well defined no matter what value is used.  This gives the
-   compiler more freedom to optimize.  Here are some examples of (potentially
-   surprising) transformations that are valid (in pseudo IR):</p>
-
-
-<pre class="doc_code">
-  %A = add %X, undef
-  %B = sub %X, undef
-  %C = xor %X, undef
-Safe:
-  %A = undef
-  %B = undef
-  %C = undef
-</pre>
-
-<p>This is safe because all of the output bits are affected by the undef bits.
-   Any output bit can have a zero or one depending on the input bits.</p>
-
-<pre class="doc_code">
-  %A = or %X, undef
-  %B = and %X, undef
-Safe:
-  %A = -1
-  %B = 0
-Unsafe:
-  %A = undef
-  %B = undef
-</pre>
-
-<p>These logical operations have bits that are not always affected by the input.
-   For example, if <tt>%X</tt> has a zero bit, then the output of the
-   '<tt>and</tt>' operation will always be a zero for that bit, no matter what
-   the corresponding bit from the '<tt>undef</tt>' is. As such, it is unsafe to
-   optimize or assume that the result of the '<tt>and</tt>' is '<tt>undef</tt>'.
-   However, it is safe to assume that all bits of the '<tt>undef</tt>' could be
-   0, and optimize the '<tt>and</tt>' to 0. Likewise, it is safe to assume that
-   all the bits of the '<tt>undef</tt>' operand to the '<tt>or</tt>' could be
-   set, allowing the '<tt>or</tt>' to be folded to -1.</p>
-
-<pre class="doc_code">
-  %A = select undef, %X, %Y
-  %B = select undef, 42, %Y
-  %C = select %X, %Y, undef
-Safe:
-  %A = %X     (or %Y)
-  %B = 42     (or %Y)
-  %C = %Y
-Unsafe:
-  %A = undef
-  %B = undef
-  %C = undef
-</pre>
-
-<p>This set of examples shows that undefined '<tt>select</tt>' (and conditional
-   branch) conditions can go <em>either way</em>, but they have to come from one
-   of the two operands.  In the <tt>%A</tt> example, if <tt>%X</tt> and
-   <tt>%Y</tt> were both known to have a clear low bit, then <tt>%A</tt> would
-   have to have a cleared low bit. However, in the <tt>%C</tt> example, the
-   optimizer is allowed to assume that the '<tt>undef</tt>' operand could be the
-   same as <tt>%Y</tt>, allowing the whole '<tt>select</tt>' to be
-   eliminated.</p>
-
-<pre class="doc_code">
-  %A = xor undef, undef
-
-  %B = undef
-  %C = xor %B, %B
-
-  %D = undef
-  %E = icmp lt %D, 4
-  %F = icmp gte %D, 4
-
-Safe:
-  %A = undef
-  %B = undef
-  %C = undef
-  %D = undef
-  %E = undef
-  %F = undef
-</pre>
-
-<p>This example points out that two '<tt>undef</tt>' operands are not
-   necessarily the same. This can be surprising to people (and also matches C
-   semantics) where they assume that "<tt>X^X</tt>" is always zero, even
-   if <tt>X</tt> is undefined. This isn't true for a number of reasons, but the
-   short answer is that an '<tt>undef</tt>' "variable" can arbitrarily change
-   its value over its "live range".  This is true because the variable doesn't
-   actually <em>have a live range</em>. Instead, the value is logically read
-   from arbitrary registers that happen to be around when needed, so the value
-   is not necessarily consistent over time. In fact, <tt>%A</tt> and <tt>%C</tt>
-   need to have the same semantics or the core LLVM "replace all uses with"
-   concept would not hold.</p>
-
-<pre class="doc_code">
-  %A = fdiv undef, %X
-  %B = fdiv %X, undef
-Safe:
-  %A = undef
-b: unreachable
-</pre>
-
-<p>These examples show the crucial difference between an <em>undefined
-  value</em> and <em>undefined behavior</em>. An undefined value (like
-  '<tt>undef</tt>') is allowed to have an arbitrary bit-pattern. This means that
-  the <tt>%A</tt> operation can be constant folded to '<tt>undef</tt>', because
-  the '<tt>undef</tt>' could be an SNaN, and <tt>fdiv</tt> is not (currently)
-  defined on SNaN's. However, in the second example, we can make a more
-  aggressive assumption: because the <tt>undef</tt> is allowed to be an
-  arbitrary value, we are allowed to assume that it could be zero. Since a
-  divide by zero has <em>undefined behavior</em>, we are allowed to assume that
-  the operation does not execute at all. This allows us to delete the divide and
-  all code after it. Because the undefined operation "can't happen", the
-  optimizer can assume that it occurs in dead code.</p>
-
-<pre class="doc_code">
-a:  store undef -> %X
-b:  store %X -> undef
-Safe:
-a: &lt;deleted&gt;
-b: unreachable
-</pre>
-
-<p>These examples reiterate the <tt>fdiv</tt> example: a store <em>of</em> an
-   undefined value can be assumed to not have any effect; we can assume that the
-   value is overwritten with bits that happen to match what was already there.
-   However, a store <em>to</em> an undefined location could clobber arbitrary
-   memory, therefore, it has undefined behavior.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="poisonvalues">Poison Values</a>
-</h3>
-
-<div>
-
-<p>Poison values are similar to <a href="#undefvalues">undef values</a>, however
-   they also represent the fact that an instruction or constant expression which
-   cannot evoke side effects has nevertheless detected a condition which results
-   in undefined behavior.</p>
-
-<p>There is currently no way of representing a poison value in the IR; they
-   only exist when produced by operations such as
-   <a href="#i_add"><tt>add</tt></a> with the <tt>nsw</tt> flag.</p>
-
-<p>Poison value behavior is defined in terms of value <i>dependence</i>:</p>
-
-<ul>
-<li>Values other than <a href="#i_phi"><tt>phi</tt></a> nodes depend on
-    their operands.</li>
-
-<li><a href="#i_phi"><tt>Phi</tt></a> nodes depend on the operand corresponding
-    to their dynamic predecessor basic block.</li>
-
-<li>Function arguments depend on the corresponding actual argument values in
-    the dynamic callers of their functions.</li>
-
-<li><a href="#i_call"><tt>Call</tt></a> instructions depend on the
-    <a href="#i_ret"><tt>ret</tt></a> instructions that dynamically transfer
-    control back to them.</li>
-
-<li><a href="#i_invoke"><tt>Invoke</tt></a> instructions depend on the
-    <a href="#i_ret"><tt>ret</tt></a>, <a href="#i_resume"><tt>resume</tt></a>,
-    or exception-throwing call instructions that dynamically transfer control
-    back to them.</li>
-
-<li>Non-volatile loads and stores depend on the most recent stores to all of the
-    referenced memory addresses, following the order in the IR
-    (including loads and stores implied by intrinsics such as
-    <a href="#int_memcpy"><tt>@llvm.memcpy</tt></a>.)</li>
-
-<!-- TODO: In the case of multiple threads, this only applies if the store
-     "happens-before" the load or store. -->
-
-<!-- TODO: floating-point exception state -->
-
-<li>An instruction with externally visible side effects depends on the most
-    recent preceding instruction with externally visible side effects, following
-    the order in the IR. (This includes
-    <a href="#volatile">volatile operations</a>.)</li>
-
-<li>An instruction <i>control-depends</i> on a
-    <a href="#terminators">terminator instruction</a>
-    if the terminator instruction has multiple successors and the instruction
-    is always executed when control transfers to one of the successors, and
-    may not be executed when control is transferred to another.</li>
-
-<li>Additionally, an instruction also <i>control-depends</i> on a terminator
-    instruction if the set of instructions it otherwise depends on would be
-    different if the terminator had transferred control to a different
-    successor.</li>
-
-<li>Dependence is transitive.</li>
-
-</ul>
-
-<p>Poison Values have the same behavior as <a href="#undefvalues">undef values</a>,
-   with the additional affect that any instruction which has a <i>dependence</i>
-   on a poison value has undefined behavior.</p>
-
-<p>Here are some examples:</p>
-
-<pre class="doc_code">
-entry:
-  %poison = sub nuw i32 0, 1           ; Results in a poison value.
-  %still_poison = and i32 %poison, 0   ; 0, but also poison.
-  %poison_yet_again = getelementptr i32* @h, i32 %still_poison
-  store i32 0, i32* %poison_yet_again  ; memory at @h[0] is poisoned
-
-  store i32 %poison, i32* @g           ; Poison value stored to memory.
-  %poison2 = load i32* @g              ; Poison value loaded back from memory.
-
-  store volatile i32 %poison, i32* @g  ; External observation; undefined behavior.
-
-  %narrowaddr = bitcast i32* @g to i16*
-  %wideaddr = bitcast i32* @g to i64*
-  %poison3 = load i16* %narrowaddr     ; Returns a poison value.
-  %poison4 = load i64* %wideaddr       ; Returns a poison value.
-
-  %cmp = icmp slt i32 %poison, 0       ; Returns a poison value.
-  br i1 %cmp, label %true, label %end  ; Branch to either destination.
-
-true:
-  store volatile i32 0, i32* @g        ; This is control-dependent on %cmp, so
-                                       ; it has undefined behavior.
-  br label %end
-
-end:
-  %p = phi i32 [ 0, %entry ], [ 1, %true ]
-                                       ; Both edges into this PHI are
-                                       ; control-dependent on %cmp, so this
-                                       ; always results in a poison value.
-
-  store volatile i32 0, i32* @g        ; This would depend on the store in %true
-                                       ; if %cmp is true, or the store in %entry
-                                       ; otherwise, so this is undefined behavior.
-
-  br i1 %cmp, label %second_true, label %second_end
-                                       ; The same branch again, but this time the
-                                       ; true block doesn't have side effects.
-
-second_true:
-  ; No side effects!
-  ret void
-
-second_end:
-  store volatile i32 0, i32* @g        ; This time, the instruction always depends
-                                       ; on the store in %end. Also, it is
-                                       ; control-equivalent to %end, so this is
-                                       ; well-defined (ignoring earlier undefined
-                                       ; behavior in this example).
-</pre>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="blockaddress">Addresses of Basic Blocks</a>
-</h3>
-
-<div>
-
-<p><b><tt>blockaddress(@function, %block)</tt></b></p>
-
-<p>The '<tt>blockaddress</tt>' constant computes the address of the specified
-   basic block in the specified function, and always has an i8* type.  Taking
-   the address of the entry block is illegal.</p>
-
-<p>This value only has defined behavior when used as an operand to the
-   '<a href="#i_indirectbr"><tt>indirectbr</tt></a>' instruction, or for
-   comparisons against null. Pointer equality tests between labels addresses
-   results in undefined behavior &mdash; though, again, comparison against null
-   is ok, and no label is equal to the null pointer. This may be passed around
-   as an opaque pointer sized value as long as the bits are not inspected. This
-   allows <tt>ptrtoint</tt> and arithmetic to be performed on these values so
-   long as the original value is reconstituted before the <tt>indirectbr</tt>
-   instruction.</p>
-
-<p>Finally, some targets may provide defined semantics when using the value as
-   the operand to an inline assembly, but that is target specific.</p>
-
-</div>
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="constantexprs">Constant Expressions</a>
-</h3>
-
-<div>
-
-<p>Constant expressions are used to allow expressions involving other constants
-   to be used as constants.  Constant expressions may be of
-   any <a href="#t_firstclass">first class</a> type and may involve any LLVM
-   operation that does not have side effects (e.g. load and call are not
-   supported). The following is the syntax for constant expressions:</p>
-
-<dl>
-  <dt><b><tt>trunc (CST to TYPE)</tt></b></dt>
-  <dd>Truncate a constant to another type. The bit size of CST must be larger
-      than the bit size of TYPE. Both types must be integers.</dd>
-
-  <dt><b><tt>zext (CST to TYPE)</tt></b></dt>
-  <dd>Zero extend a constant to another type. The bit size of CST must be
-      smaller than the bit size of TYPE.  Both types must be integers.</dd>
-
-  <dt><b><tt>sext (CST to TYPE)</tt></b></dt>
-  <dd>Sign extend a constant to another type. The bit size of CST must be
-      smaller than the bit size of TYPE.  Both types must be integers.</dd>
-
-  <dt><b><tt>fptrunc (CST to TYPE)</tt></b></dt>
-  <dd>Truncate a floating point constant to another floating point type. The
-      size of CST must be larger than the size of TYPE. Both types must be
-      floating point.</dd>
-
-  <dt><b><tt>fpext (CST to TYPE)</tt></b></dt>
-  <dd>Floating point extend a constant to another type. The size of CST must be
-      smaller or equal to the size of TYPE. Both types must be floating
-      point.</dd>
-
-  <dt><b><tt>fptoui (CST to TYPE)</tt></b></dt>
-  <dd>Convert a floating point constant to the corresponding unsigned integer
-      constant. TYPE must be a scalar or vector integer type. CST must be of
-      scalar or vector floating point type. Both CST and TYPE must be scalars,
-      or vectors of the same number of elements. If the value won't fit in the
-      integer type, the results are undefined.</dd>
-
-  <dt><b><tt>fptosi (CST to TYPE)</tt></b></dt>
-  <dd>Convert a floating point constant to the corresponding signed integer
-      constant.  TYPE must be a scalar or vector integer type. CST must be of
-      scalar or vector floating point type. Both CST and TYPE must be scalars,
-      or vectors of the same number of elements. If the value won't fit in the
-      integer type, the results are undefined.</dd>
-
-  <dt><b><tt>uitofp (CST to TYPE)</tt></b></dt>
-  <dd>Convert an unsigned integer constant to the corresponding floating point
-      constant. TYPE must be a scalar or vector floating point type. CST must be
-      of scalar or vector integer type. Both CST and TYPE must be scalars, or
-      vectors of the same number of elements. If the value won't fit in the
-      floating point type, the results are undefined.</dd>
-
-  <dt><b><tt>sitofp (CST to TYPE)</tt></b></dt>
-  <dd>Convert a signed integer constant to the corresponding floating point
-      constant. TYPE must be a scalar or vector floating point type. CST must be
-      of scalar or vector integer type. Both CST and TYPE must be scalars, or
-      vectors of the same number of elements. If the value won't fit in the
-      floating point type, the results are undefined.</dd>
-
-  <dt><b><tt>ptrtoint (CST to TYPE)</tt></b></dt>
-  <dd>Convert a pointer typed constant to the corresponding integer constant
-      <tt>TYPE</tt> must be an integer type. <tt>CST</tt> must be of pointer
-      type. The <tt>CST</tt> value is zero extended, truncated, or unchanged to
-      make it fit in <tt>TYPE</tt>.</dd>
-
-  <dt><b><tt>inttoptr (CST to TYPE)</tt></b></dt>
-  <dd>Convert an integer constant to a pointer constant.  TYPE must be a pointer
-      type.  CST must be of integer type. The CST value is zero extended,
-      truncated, or unchanged to make it fit in a pointer size. This one is
-      <i>really</i> dangerous!</dd>
-
-  <dt><b><tt>bitcast (CST to TYPE)</tt></b></dt>
-  <dd>Convert a constant, CST, to another TYPE. The constraints of the operands
-      are the same as those for the <a href="#i_bitcast">bitcast
-      instruction</a>.</dd>
-
-  <dt><b><tt>getelementptr (CSTPTR, IDX0, IDX1, ...)</tt></b></dt>
-  <dt><b><tt>getelementptr inbounds (CSTPTR, IDX0, IDX1, ...)</tt></b></dt>
-  <dd>Perform the <a href="#i_getelementptr">getelementptr operation</a> on
-      constants.  As with the <a href="#i_getelementptr">getelementptr</a>
-      instruction, the index list may have zero or more indexes, which are
-      required to make sense for the type of "CSTPTR".</dd>
-
-  <dt><b><tt>select (COND, VAL1, VAL2)</tt></b></dt>
-  <dd>Perform the <a href="#i_select">select operation</a> on constants.</dd>
-
-  <dt><b><tt>icmp COND (VAL1, VAL2)</tt></b></dt>
-  <dd>Performs the <a href="#i_icmp">icmp operation</a> on constants.</dd>
-
-  <dt><b><tt>fcmp COND (VAL1, VAL2)</tt></b></dt>
-  <dd>Performs the <a href="#i_fcmp">fcmp operation</a> on constants.</dd>
-
-  <dt><b><tt>extractelement (VAL, IDX)</tt></b></dt>
-  <dd>Perform the <a href="#i_extractelement">extractelement operation</a> on
-      constants.</dd>
-
-  <dt><b><tt>insertelement (VAL, ELT, IDX)</tt></b></dt>
-  <dd>Perform the <a href="#i_insertelement">insertelement operation</a> on
-    constants.</dd>
-
-  <dt><b><tt>shufflevector (VEC1, VEC2, IDXMASK)</tt></b></dt>
-  <dd>Perform the <a href="#i_shufflevector">shufflevector operation</a> on
-      constants.</dd>
-
-  <dt><b><tt>extractvalue (VAL, IDX0, IDX1, ...)</tt></b></dt>
-  <dd>Perform the <a href="#i_extractvalue">extractvalue operation</a> on
-    constants. The index list is interpreted in a similar manner as indices in
-    a '<a href="#i_getelementptr">getelementptr</a>' operation. At least one
-    index value must be specified.</dd>
-
-  <dt><b><tt>insertvalue (VAL, ELT, IDX0, IDX1, ...)</tt></b></dt>
-  <dd>Perform the <a href="#i_insertvalue">insertvalue operation</a> on
-    constants. The index list is interpreted in a similar manner as indices in
-    a '<a href="#i_getelementptr">getelementptr</a>' operation. At least one
-    index value must be specified.</dd>
-
-  <dt><b><tt>OPCODE (LHS, RHS)</tt></b></dt>
-  <dd>Perform the specified operation of the LHS and RHS constants. OPCODE may
-      be any of the <a href="#binaryops">binary</a>
-      or <a href="#bitwiseops">bitwise binary</a> operations.  The constraints
-      on operands are the same as those for the corresponding instruction
-      (e.g. no bitwise operations on floating point values are allowed).</dd>
-</dl>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="othervalues">Other Values</a></h2>
-<!-- *********************************************************************** -->
-<div>
-<!-- ======================================================================= -->
-<h3>
-<a name="inlineasm">Inline Assembler Expressions</a>
-</h3>
-
-<div>
-
-<p>LLVM supports inline assembler expressions (as opposed
-   to <a href="#moduleasm">Module-Level Inline Assembly</a>) through the use of
-   a special value.  This value represents the inline assembler as a string
-   (containing the instructions to emit), a list of operand constraints (stored
-   as a string), a flag that indicates whether or not the inline asm
-   expression has side effects, and a flag indicating whether the function
-   containing the asm needs to align its stack conservatively.  An example
-   inline assembler expression is:</p>
-
-<pre class="doc_code">
-i32 (i32) asm "bswap $0", "=r,r"
-</pre>
-
-<p>Inline assembler expressions may <b>only</b> be used as the callee operand of
-   a <a href="#i_call"><tt>call</tt></a> or an
-   <a href="#i_invoke"><tt>invoke</tt></a> instruction.
-   Thus, typically we have:</p>
-
-<pre class="doc_code">
-%X = call i32 asm "<a href="#int_bswap">bswap</a> $0", "=r,r"(i32 %Y)
-</pre>
-
-<p>Inline asms with side effects not visible in the constraint list must be
-   marked as having side effects.  This is done through the use of the
-   '<tt>sideeffect</tt>' keyword, like so:</p>
-
-<pre class="doc_code">
-call void asm sideeffect "eieio", ""()
-</pre>
-
-<p>In some cases inline asms will contain code that will not work unless the
-   stack is aligned in some way, such as calls or SSE instructions on x86,
-   yet will not contain code that does that alignment within the asm.
-   The compiler should make conservative assumptions about what the asm might
-   contain and should generate its usual stack alignment code in the prologue
-   if the '<tt>alignstack</tt>' keyword is present:</p>
-
-<pre class="doc_code">
-call void asm alignstack "eieio", ""()
-</pre>
-
-<p>Inline asms also support using non-standard assembly dialects.  The assumed
-   dialect is ATT.  When the '<tt>inteldialect</tt>' keyword is present, the
-   inline asm is using the Intel dialect.  Currently, ATT and Intel are the
-   only supported dialects.  An example is:</p>
-
-<pre class="doc_code">
-call void asm inteldialect "eieio", ""()
-</pre>
-
-<p>If multiple keywords appear the '<tt>sideeffect</tt>' keyword must come
-   first, the '<tt>alignstack</tt>' keyword second and the
-   '<tt>inteldialect</tt>' keyword last.</p>
-
-<!--
-<p>TODO: The format of the asm and constraints string still need to be
-   documented here.  Constraints on what can be done (e.g. duplication, moving,
-   etc need to be documented).  This is probably best done by reference to
-   another document that covers inline asm from a holistic perspective.</p>
-  -->
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="inlineasm_md">Inline Asm Metadata</a>
-</h4>
-
-<div>
-
-<p>The call instructions that wrap inline asm nodes may have a
-   "<tt>!srcloc</tt>" MDNode attached to it that contains a list of constant
-   integers.  If present, the code generator will use the integer as the
-   location cookie value when report errors through the <tt>LLVMContext</tt>
-   error reporting mechanisms.  This allows a front-end to correlate backend
-   errors that occur with inline asm back to the source code that produced it.
-   For example:</p>
-
-<pre class="doc_code">
-call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
-...
-!42 = !{ i32 1234567 }
-</pre>
-
-<p>It is up to the front-end to make sense of the magic numbers it places in the
-   IR. If the MDNode contains multiple constants, the code generator will use
-   the one that corresponds to the line of the asm that the error occurs on.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="metadata">Metadata Nodes and Metadata Strings</a>
-</h3>
-
-<div>
-
-<p>LLVM IR allows metadata to be attached to instructions in the program that
-   can convey extra information about the code to the optimizers and code
-   generator.  One example application of metadata is source-level debug
-   information.  There are two metadata primitives: strings and nodes. All
-   metadata has the <tt>metadata</tt> type and is identified in syntax by a
-   preceding exclamation point ('<tt>!</tt>').</p>
-
-<p>A metadata string is a string surrounded by double quotes.  It can contain
-   any character by escaping non-printable characters with "<tt>\xx</tt>" where
-   "<tt>xx</tt>" is the two digit hex code.  For example:
-   "<tt>!"test\00"</tt>".</p>
-
-<p>Metadata nodes are represented with notation similar to structure constants
-   (a comma separated list of elements, surrounded by braces and preceded by an
-   exclamation point). Metadata nodes can have any values as their operand. For
-   example:</p>
-
-<div class="doc_code">
-<pre>
-!{ metadata !"test\00", i32 10}
-</pre>
-</div>
-
-<p>A <a href="#namedmetadatastructure">named metadata</a> is a collection of
-   metadata nodes, which can be looked up in the module symbol table. For
-   example:</p>
-
-<div class="doc_code">
-<pre>
-!foo =  metadata !{!4, !3}
-</pre>
-</div>
-
-<p>Metadata can be used as function arguments. Here <tt>llvm.dbg.value</tt>
-   function is using two metadata arguments:</p>
-
-<div class="doc_code">
-<pre>
-call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
-</pre>
-</div>
-
-<p>Metadata can be attached with an instruction. Here metadata <tt>!21</tt> is
-   attached to the <tt>add</tt> instruction using the <tt>!dbg</tt>
-   identifier:</p>
-
-<div class="doc_code">
-<pre>
-%indvar.next = add i64 %indvar, 1, !dbg !21
-</pre>
-</div>
-
-<p>More information about specific metadata nodes recognized by the optimizers
-   and code generator is found below.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="tbaa">'<tt>tbaa</tt>' Metadata</a>
-</h4>
-
-<div>
-
-<p>In LLVM IR, memory does not have types, so LLVM's own type system is not
-   suitable for doing TBAA. Instead, metadata is added to the IR to describe
-   a type system of a higher level language. This can be used to implement
-   typical C/C++ TBAA, but it can also be used to implement custom alias
-   analysis behavior for other languages.</p>
-
-<p>The current metadata format is very simple. TBAA metadata nodes have up to
-   three fields, e.g.:</p>
-
-<div class="doc_code">
-<pre>
-!0 = metadata !{ metadata !"an example type tree" }
-!1 = metadata !{ metadata !"int", metadata !0 }
-!2 = metadata !{ metadata !"float", metadata !0 }
-!3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
-</pre>
-</div>
-
-<p>The first field is an identity field. It can be any value, usually
-   a metadata string, which uniquely identifies the type. The most important
-   name in the tree is the name of the root node. Two trees with
-   different root node names are entirely disjoint, even if they
-   have leaves with common names.</p>
-
-<p>The second field identifies the type's parent node in the tree, or
-   is null or omitted for a root node. A type is considered to alias
-   all of its descendants and all of its ancestors in the tree. Also,
-   a type is considered to alias all types in other trees, so that
-   bitcode produced from multiple front-ends is handled conservatively.</p>
-
-<p>If the third field is present, it's an integer which if equal to 1
-   indicates that the type is "constant" (meaning
-   <tt>pointsToConstantMemory</tt> should return true; see
-   <a href="AliasAnalysis.html#OtherItfs">other useful
-   <tt>AliasAnalysis</tt> methods</a>).</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="tbaa.struct">'<tt>tbaa.struct</tt>' Metadata</a>
-</h4>
-
-<div>
-
-<p>The <a href="#int_memcpy"><tt>llvm.memcpy</tt></a> is often used to implement
-aggregate assignment operations in C and similar languages, however it is
-defined to copy a contiguous region of memory, which is more than strictly
-necessary for aggregate types which contain holes due to padding. Also, it
-doesn't contain any TBAA information about the fields of the aggregate.</p>
-
-<p><tt>!tbaa.struct</tt> metadata can describe which memory subregions in a memcpy
-are padding and what the TBAA tags of the struct are.</p>
-
-<p>The current metadata format is very simple. <tt>!tbaa.struct</tt> metadata nodes
-   are a list of operands which are in conceptual groups of three. For each
-   group of three, the first operand gives the byte offset of a field in bytes,
-   the second gives its size in bytes, and the third gives its
-   tbaa tag. e.g.:</p>
-
-<div class="doc_code">
-<pre>
-!4 = metadata !{ i64 0, i64 4, metadata !1, i64 8, i64 4, metadata !2 }
-</pre>
-</div>
-
-<p>This describes a struct with two fields. The first is at offset 0 bytes
-   with size 4 bytes, and has tbaa tag !1. The second is at offset 8 bytes
-   and has size 4 bytes and has tbaa tag !2.</p>
-
-<p>Note that the fields need not be contiguous. In this example, there is a
-   4 byte gap between the two fields. This gap represents padding which
-   does not carry useful data and need not be preserved.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="fpmath">'<tt>fpmath</tt>' Metadata</a>
-</h4>
-
-<div>
-
-<p><tt>fpmath</tt> metadata may be attached to any instruction of floating point
-  type.  It can be used to express the maximum acceptable error in the result of
-  that instruction, in ULPs, thus potentially allowing the compiler to use a
-  more efficient but less accurate method of computing it.  ULP is defined as
-  follows:</p>
-
-<blockquote>
-
-<p>If <tt>x</tt> is a real number that lies between two finite consecutive
-   floating-point numbers <tt>a</tt> and <tt>b</tt>, without being equal to one
-   of them, then <tt>ulp(x) = |b - a|</tt>, otherwise <tt>ulp(x)</tt> is the
-   distance between the two non-equal finite floating-point numbers nearest
-   <tt>x</tt>. Moreover, <tt>ulp(NaN)</tt> is <tt>NaN</tt>.</p>
-
-</blockquote>
-
-<p>The metadata node shall consist of a single positive floating point number
-   representing the maximum relative error, for example:</p>
-
-<div class="doc_code">
-<pre>
-!0 = metadata !{ float 2.5 } ; maximum acceptable inaccuracy is 2.5 ULPs
-</pre>
-</div>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="range">'<tt>range</tt>' Metadata</a>
-</h4>
-
-<div>
-<p><tt>range</tt> metadata may be attached only to loads of integer types. It
-   expresses the possible ranges the loaded value is in. The ranges are
-   represented with a flattened list of integers. The loaded value is known to
-   be in the union of the ranges defined by each consecutive pair. Each pair
-   has the following properties:</p>
-<ul>
-   <li>The type must match the type loaded by the instruction.</li>
-   <li>The pair <tt>a,b</tt> represents the range <tt>[a,b)</tt>.</li>
-   <li>Both <tt>a</tt> and <tt>b</tt> are constants.</li>
-   <li>The range is allowed to wrap.</li>
-   <li>The range should not represent the full or empty set. That is,
-       <tt>a!=b</tt>. </li>
-</ul>
-<p> In addition, the pairs must be in signed order of the lower bound and
-  they must be non-contiguous.</p>
-
-<p>Examples:</p>
-<div class="doc_code">
-<pre>
-  %a = load i8* %x, align 1, !range !0 ; Can only be 0 or 1
-  %b = load i8* %y, align 1, !range !1 ; Can only be 255 (-1), 0 or 1
-  %c = load i8* %z, align 1, !range !2 ; Can only be 0, 1, 3, 4 or 5
-  %d = load i8* %z, align 1, !range !3 ; Can only be -2, -1, 3, 4 or 5
-...
-!0 = metadata !{ i8 0, i8 2 }
-!1 = metadata !{ i8 255, i8 2 }
-!2 = metadata !{ i8 0, i8 2, i8 3, i8 6 }
-!3 = metadata !{ i8 -2, i8 0, i8 3, i8 6 }
-</pre>
-</div>
-</div>
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="module_flags">Module Flags Metadata</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Information about the module as a whole is difficult to convey to LLVM's
-   subsystems. The LLVM IR isn't sufficient to transmit this
-   information. The <tt>llvm.module.flags</tt> named metadata exists in order to
-   facilitate this. These flags are in the form of key / value pairs &mdash;
-   much like a dictionary &mdash; making it easy for any subsystem who cares
-   about a flag to look it up.</p>
-
-<p>The <tt>llvm.module.flags</tt> metadata contains a list of metadata
-   triplets. Each triplet has the following form:</p>
-
-<ul>
-  <li>The first element is a <i>behavior</i> flag, which specifies the behavior
-      when two (or more) modules are merged together, and it encounters two (or
-      more) metadata with the same ID. The supported behaviors are described
-      below.</li>
-
-  <li>The second element is a metadata string that is a unique ID for the
-      metadata. How each ID is interpreted is documented below.</li>
-
-  <li>The third element is the value of the flag.</li>
-</ul>
-
-<p>When two (or more) modules are merged together, the resulting
-   <tt>llvm.module.flags</tt> metadata is the union of the
-   modules' <tt>llvm.module.flags</tt> metadata. The only exception being a flag
-   with the <i>Override</i> behavior, which may override another flag's value
-   (see below).</p>
-
-<p>The following behaviors are supported:</p>
-
-<table border="1" cellspacing="0" cellpadding="4">
-  <tbody>
-    <tr>
-      <th>Value</th>
-      <th>Behavior</th>
-    </tr>
-    <tr>
-      <td>1</td>
-      <td align="left">
-        <dl>
-          <dt><b>Error</b></dt>
-          <dd>Emits an error if two values disagree. It is an error to have an ID
-              with both an Error and a Warning behavior.</dd>
-        </dl>
-      </td>
-    </tr>
-    <tr>
-      <td>2</td>
-      <td align="left">
-        <dl>
-          <dt><b>Warning</b></dt>
-          <dd>Emits a warning if two values disagree.</dd>
-        </dl>
-      </td>
-    </tr>
-    <tr>
-      <td>3</td>
-      <td align="left">
-        <dl>
-          <dt><b>Require</b></dt>
-          <dd>Emits an error when the specified value is not present or doesn't
-              have the specified value. It is an error for two (or more)
-              <tt>llvm.module.flags</tt> with the same ID to have the Require
-              behavior but different values. There may be multiple Require flags
-              per ID.</dd>
-        </dl>
-      </td>
-    </tr>
-    <tr>
-      <td>4</td>
-      <td align="left">
-        <dl>
-          <dt><b>Override</b></dt>
-          <dd>Uses the specified value if the two values disagree. It is an
-              error for two (or more) <tt>llvm.module.flags</tt> with the same
-              ID to have the Override behavior but different values.</dd>
-        </dl>
-      </td>
-    </tr>
-  </tbody>
-</table>
-
-<p>An example of module flags:</p>
-
-<pre class="doc_code">
-!0 = metadata !{ i32 1, metadata !"foo", i32 1 }
-!1 = metadata !{ i32 4, metadata !"bar", i32 37 }
-!2 = metadata !{ i32 2, metadata !"qux", i32 42 }
-!3 = metadata !{ i32 3, metadata !"qux",
-  metadata !{
-    metadata !"foo", i32 1
-  }
-}
-!llvm.module.flags = !{ !0, !1, !2, !3 }
-</pre>
-
-<ul>
-  <li><p>Metadata <tt>!0</tt> has the ID <tt>!"foo"</tt> and the value '1'. The
-         behavior if two or more <tt>!"foo"</tt> flags are seen is to emit an
-         error if their values are not equal.</p></li>
-
-  <li><p>Metadata <tt>!1</tt> has the ID <tt>!"bar"</tt> and the value '37'. The
-         behavior if two or more <tt>!"bar"</tt> flags are seen is to use the
-         value '37' if their values are not equal.</p></li>
-
-  <li><p>Metadata <tt>!2</tt> has the ID <tt>!"qux"</tt> and the value '42'. The
-         behavior if two or more <tt>!"qux"</tt> flags are seen is to emit a
-         warning if their values are not equal.</p></li>
-
-  <li><p>Metadata <tt>!3</tt> has the ID <tt>!"qux"</tt> and the value:</p>
-
-<pre class="doc_code">
-metadata !{ metadata !"foo", i32 1 }
-</pre>
-
-      <p>The behavior is to emit an error if the <tt>llvm.module.flags</tt> does
-         not contain a flag with the ID <tt>!"foo"</tt> that has the value
-         '1'. If two or more <tt>!"qux"</tt> flags exist, then they must have
-         the same value or an error will be issued.</p></li>
-</ul>
-
-
-<!-- ======================================================================= -->
-<h3>
-<a name="objc_gc_flags">Objective-C Garbage Collection Module Flags Metadata</a>
-</h3>
-
-<div>
-
-<p>On the Mach-O platform, Objective-C stores metadata about garbage collection
-   in a special section called "image info". The metadata consists of a version
-   number and a bitmask specifying what types of garbage collection are
-   supported (if any) by the file. If two or more modules are linked together
-   their garbage collection metadata needs to be merged rather than appended
-   together.</p>
-
-<p>The Objective-C garbage collection module flags metadata consists of the
-   following key-value pairs:</p>
-
-<table border="1" cellspacing="0" cellpadding="4">
-  <col width="30%">
-  <tbody>
-    <tr>
-      <th>Key</th>
-      <th>Value</th>
-    </tr>
-    <tr>
-      <td><tt>Objective-C&nbsp;Version</tt></td>
-      <td align="left"><b>[Required]</b> &mdash; The Objective-C ABI
-         version. Valid values are 1 and 2.</td>
-    </tr>
-    <tr>
-      <td><tt>Objective-C&nbsp;Image&nbsp;Info&nbsp;Version</tt></td>
-      <td align="left"><b>[Required]</b> &mdash; The version of the image info
-         section. Currently always 0.</td>
-    </tr>
-    <tr>
-      <td><tt>Objective-C&nbsp;Image&nbsp;Info&nbsp;Section</tt></td>
-      <td align="left"><b>[Required]</b> &mdash; The section to place the
-         metadata. Valid values are <tt>"__OBJC, __image_info, regular"</tt> for
-         Objective-C ABI version 1, and <tt>"__DATA,__objc_imageinfo, regular,
-         no_dead_strip"</tt> for Objective-C ABI version 2.</td>
-    </tr>
-    <tr>
-      <td><tt>Objective-C&nbsp;Garbage&nbsp;Collection</tt></td>
-      <td align="left"><b>[Required]</b> &mdash; Specifies whether garbage
-          collection is supported or not. Valid values are 0, for no garbage
-          collection, and 2, for garbage collection supported.</td>
-    </tr>
-    <tr>
-      <td><tt>Objective-C&nbsp;GC&nbsp;Only</tt></td>
-      <td align="left"><b>[Optional]</b> &mdash; Specifies that only garbage
-         collection is supported. If present, its value must be 6. This flag
-         requires that the <tt>Objective-C Garbage Collection</tt> flag have the
-         value 2.</td>
-    </tr>
-  </tbody>
-</table>
-
-<p>Some important flag interactions:</p>
-
-<ul>
-  <li>If a module with <tt>Objective-C Garbage Collection</tt> set to 0 is
-      merged with a module with <tt>Objective-C Garbage Collection</tt> set to
-      2, then the resulting module has the <tt>Objective-C Garbage
-      Collection</tt> flag set to 0.</li>
-
-  <li>A module with <tt>Objective-C Garbage Collection</tt> set to 0 cannot be
-      merged with a module with <tt>Objective-C GC Only</tt> set to 6.</li>
-</ul>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="intrinsic_globals">Intrinsic Global Variables</a>
-</h2>
-<!-- *********************************************************************** -->
-<div>
-<p>LLVM has a number of "magic" global variables that contain data that affect
-code generation or other IR semantics.  These are documented here.  All globals
-of this sort should have a section specified as "<tt>llvm.metadata</tt>".  This
-section and all globals that start with "<tt>llvm.</tt>" are reserved for use
-by LLVM.</p>
-
-<!-- ======================================================================= -->
-<h3>
-<a name="intg_used">The '<tt>llvm.used</tt>' Global Variable</a>
-</h3>
-
-<div>
-
-<p>The <tt>@llvm.used</tt> global is an array with i8* element type which has <a
-href="#linkage_appending">appending linkage</a>.  This array contains a list of
-pointers to global variables and functions which may optionally have a pointer
-cast formed of bitcast or getelementptr.  For example, a legal use of it is:</p>
-
-<div class="doc_code">
-<pre>
-@X = global i8 4
-@Y = global i32 123
-
-@llvm.used = appending global [2 x i8*] [
-   i8* @X,
-   i8* bitcast (i32* @Y to i8*)
-], section "llvm.metadata"
-</pre>
-</div>
-
-<p>If a global variable appears in the <tt>@llvm.used</tt> list, then the
-   compiler, assembler, and linker are required to treat the symbol as if there
-   is a reference to the global that it cannot see.  For example, if a variable
-   has internal linkage and no references other than that from
-   the <tt>@llvm.used</tt> list, it cannot be deleted.  This is commonly used to
-   represent references from inline asms and other things the compiler cannot
-   "see", and corresponds to "<tt>attribute((used))</tt>" in GNU C.</p>
-
-<p>On some targets, the code generator must emit a directive to the assembler or
-   object file to prevent the assembler and linker from molesting the
-   symbol.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="intg_compiler_used">
-    The '<tt>llvm.compiler.used</tt>' Global Variable
-  </a>
-</h3>
-
-<div>
-
-<p>The <tt>@llvm.compiler.used</tt> directive is the same as the
-   <tt>@llvm.used</tt> directive, except that it only prevents the compiler from
-   touching the symbol.  On targets that support it, this allows an intelligent
-   linker to optimize references to the symbol without being impeded as it would
-   be by <tt>@llvm.used</tt>.</p>
-
-<p>This is a rare construct that should only be used in rare circumstances, and
-   should not be exposed to source languages.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-<a name="intg_global_ctors">The '<tt>llvm.global_ctors</tt>' Global Variable</a>
-</h3>
-
-<div>
-
-<div class="doc_code">
-<pre>
-%0 = type { i32, void ()* }
-@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @ctor }]
-</pre>
-</div>
-
-<p>The <tt>@llvm.global_ctors</tt> array contains a list of constructor
-   functions and associated priorities.  The functions referenced by this array
-   will be called in ascending order of priority (i.e. lowest first) when the
-   module is loaded.  The order of functions with the same priority is not
-   defined.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-<a name="intg_global_dtors">The '<tt>llvm.global_dtors</tt>' Global Variable</a>
-</h3>
-
-<div>
-
-<div class="doc_code">
-<pre>
-%0 = type { i32, void ()* }
-@llvm.global_dtors = appending global [1 x %0] [%0 { i32 65535, void ()* @dtor }]
-</pre>
-</div>
-
-<p>The <tt>@llvm.global_dtors</tt> array contains a list of destructor functions
-   and associated priorities.  The functions referenced by this array will be
-   called in descending order of priority (i.e. highest first) when the module
-   is loaded.  The order of functions with the same priority is not defined.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="instref">Instruction Reference</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The LLVM instruction set consists of several different classifications of
-   instructions: <a href="#terminators">terminator
-   instructions</a>, <a href="#binaryops">binary instructions</a>,
-   <a href="#bitwiseops">bitwise binary instructions</a>,
-   <a href="#memoryops">memory instructions</a>, and
-   <a href="#otherops">other instructions</a>.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="terminators">Terminator Instructions</a>
-</h3>
-
-<div>
-
-<p>As mentioned <a href="#functionstructure">previously</a>, every basic block
-   in a program ends with a "Terminator" instruction, which indicates which
-   block should be executed after the current block is finished. These
-   terminator instructions typically yield a '<tt>void</tt>' value: they produce
-   control flow, not values (the one exception being the
-   '<a href="#i_invoke"><tt>invoke</tt></a>' instruction).</p>
-
-<p>The terminator instructions are:
-   '<a href="#i_ret"><tt>ret</tt></a>',
-   '<a href="#i_br"><tt>br</tt></a>',
-   '<a href="#i_switch"><tt>switch</tt></a>',
-   '<a href="#i_indirectbr"><tt>indirectbr</tt></a>',
-   '<a href="#i_invoke"><tt>invoke</tt></a>',
-   '<a href="#i_resume"><tt>resume</tt></a>', and
-   '<a href="#i_unreachable"><tt>unreachable</tt></a>'.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_ret">'<tt>ret</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  ret &lt;type&gt; &lt;value&gt;       <i>; Return a value from a non-void function</i>
-  ret void                 <i>; Return from void function</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>ret</tt>' instruction is used to return control flow (and optionally
-   a value) from a function back to the caller.</p>
-
-<p>There are two forms of the '<tt>ret</tt>' instruction: one that returns a
-   value and then causes control flow, and one that just causes control flow to
-   occur.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>ret</tt>' instruction optionally accepts a single argument, the
-   return value. The type of the return value must be a
-   '<a href="#t_firstclass">first class</a>' type.</p>
-
-<p>A function is not <a href="#wellformed">well formed</a> if it it has a
-   non-void return type and contains a '<tt>ret</tt>' instruction with no return
-   value or a return value with a type that does not match its type, or if it
-   has a void return type and contains a '<tt>ret</tt>' instruction with a
-   return value.</p>
-
-<h5>Semantics:</h5>
-<p>When the '<tt>ret</tt>' instruction is executed, control flow returns back to
-   the calling function's context.  If the caller is a
-   "<a href="#i_call"><tt>call</tt></a>" instruction, execution continues at the
-   instruction after the call.  If the caller was an
-   "<a href="#i_invoke"><tt>invoke</tt></a>" instruction, execution continues at
-   the beginning of the "normal" destination block.  If the instruction returns
-   a value, that value shall set the call or invoke instruction's return
-   value.</p>
-
-<h5>Example:</h5>
-<pre>
-  ret i32 5                       <i>; Return an integer value of 5</i>
-  ret void                        <i>; Return from a void function</i>
-  ret { i32, i8 } { i32 4, i8 2 } <i>; Return a struct of values 4 and 2</i>
-</pre>
-
-</div>
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_br">'<tt>br</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  br i1 &lt;cond&gt;, label &lt;iftrue&gt;, label &lt;iffalse&gt;
-  br label &lt;dest&gt;          <i>; Unconditional branch</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>br</tt>' instruction is used to cause control flow to transfer to a
-   different basic block in the current function.  There are two forms of this
-   instruction, corresponding to a conditional branch and an unconditional
-   branch.</p>
-
-<h5>Arguments:</h5>
-<p>The conditional branch form of the '<tt>br</tt>' instruction takes a single
-   '<tt>i1</tt>' value and two '<tt>label</tt>' values.  The unconditional form
-   of the '<tt>br</tt>' instruction takes a single '<tt>label</tt>' value as a
-   target.</p>
-
-<h5>Semantics:</h5>
-<p>Upon execution of a conditional '<tt>br</tt>' instruction, the '<tt>i1</tt>'
-   argument is evaluated.  If the value is <tt>true</tt>, control flows to the
-   '<tt>iftrue</tt>' <tt>label</tt> argument.  If "cond" is <tt>false</tt>,
-   control flows to the '<tt>iffalse</tt>' <tt>label</tt> argument.</p>
-
-<h5>Example:</h5>
-<pre>
-Test:
-  %cond = <a href="#i_icmp">icmp</a> eq i32 %a, %b
-  br i1 %cond, label %IfEqual, label %IfUnequal
-IfEqual:
-  <a href="#i_ret">ret</a> i32 1
-IfUnequal:
-  <a href="#i_ret">ret</a> i32 0
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_switch">'<tt>switch</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  switch &lt;intty&gt; &lt;value&gt;, label &lt;defaultdest&gt; [ &lt;intty&gt; &lt;val&gt;, label &lt;dest&gt; ... ]
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>switch</tt>' instruction is used to transfer control flow to one of
-   several different places.  It is a generalization of the '<tt>br</tt>'
-   instruction, allowing a branch to occur to one of many possible
-   destinations.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>switch</tt>' instruction uses three parameters: an integer
-   comparison value '<tt>value</tt>', a default '<tt>label</tt>' destination,
-   and an array of pairs of comparison value constants and '<tt>label</tt>'s.
-   The table is not allowed to contain duplicate constant entries.</p>
-
-<h5>Semantics:</h5>
-<p>The <tt>switch</tt> instruction specifies a table of values and
-   destinations. When the '<tt>switch</tt>' instruction is executed, this table
-   is searched for the given value.  If the value is found, control flow is
-   transferred to the corresponding destination; otherwise, control flow is
-   transferred to the default destination.</p>
-
-<h5>Implementation:</h5>
-<p>Depending on properties of the target machine and the particular
-   <tt>switch</tt> instruction, this instruction may be code generated in
-   different ways.  For example, it could be generated as a series of chained
-   conditional branches or with a lookup table.</p>
-
-<h5>Example:</h5>
-<pre>
- <i>; Emulate a conditional br instruction</i>
- %Val = <a href="#i_zext">zext</a> i1 %value to i32
- switch i32 %Val, label %truedest [ i32 0, label %falsedest ]
-
- <i>; Emulate an unconditional br instruction</i>
- switch i32 0, label %dest [ ]
-
- <i>; Implement a jump table:</i>
- switch i32 %val, label %otherwise [ i32 0, label %onzero
-                                     i32 1, label %onone
-                                     i32 2, label %ontwo ]
-</pre>
-
-</div>
-
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_indirectbr">'<tt>indirectbr</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  indirectbr &lt;somety&gt;* &lt;address&gt;, [ label &lt;dest1&gt;, label &lt;dest2&gt;, ... ]
-</pre>
-
-<h5>Overview:</h5>
-
-<p>The '<tt>indirectbr</tt>' instruction implements an indirect branch to a label
-   within the current function, whose address is specified by
-   "<tt>address</tt>".  Address must be derived from a <a
-   href="#blockaddress">blockaddress</a> constant.</p>
-
-<h5>Arguments:</h5>
-
-<p>The '<tt>address</tt>' argument is the address of the label to jump to.  The
-   rest of the arguments indicate the full set of possible destinations that the
-   address may point to.  Blocks are allowed to occur multiple times in the
-   destination list, though this isn't particularly useful.</p>
-
-<p>This destination list is required so that dataflow analysis has an accurate
-   understanding of the CFG.</p>
-
-<h5>Semantics:</h5>
-
-<p>Control transfers to the block specified in the address argument.  All
-   possible destination blocks must be listed in the label list, otherwise this
-   instruction has undefined behavior.  This implies that jumps to labels
-   defined in other functions have undefined behavior as well.</p>
-
-<h5>Implementation:</h5>
-
-<p>This is typically implemented with a jump through a register.</p>
-
-<h5>Example:</h5>
-<pre>
- indirectbr i8* %Addr, [ label %bb1, label %bb2, label %bb3 ]
-</pre>
-
-</div>
-
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_invoke">'<tt>invoke</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = invoke [<a href="#callingconv">cconv</a>] [<a href="#paramattrs">ret attrs</a>] &lt;ptr to function ty&gt; &lt;function ptr val&gt;(&lt;function args&gt;) [<a href="#fnattrs">fn attrs</a>]
-                to label &lt;normal label&gt; unwind label &lt;exception label&gt;
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>invoke</tt>' instruction causes control to transfer to a specified
-   function, with the possibility of control flow transfer to either the
-   '<tt>normal</tt>' label or the '<tt>exception</tt>' label.  If the callee
-   function returns with the "<tt><a href="#i_ret">ret</a></tt>" instruction,
-   control flow will return to the "normal" label.  If the callee (or any
-   indirect callees) returns via the "<a href="#i_resume"><tt>resume</tt></a>"
-   instruction or other exception handling mechanism, control is interrupted and
-   continued at the dynamically nearest "exception" label.</p>
-
-<p>The '<tt>exception</tt>' label is a
-   <i><a href="ExceptionHandling.html#overview">landing pad</a></i> for the
-   exception. As such, '<tt>exception</tt>' label is required to have the
-   "<a href="#i_landingpad"><tt>landingpad</tt></a>" instruction, which contains
-   the information about the behavior of the program after unwinding
-   happens, as its first non-PHI instruction. The restrictions on the
-   "<tt>landingpad</tt>" instruction's tightly couples it to the
-   "<tt>invoke</tt>" instruction, so that the important information contained
-   within the "<tt>landingpad</tt>" instruction can't be lost through normal
-   code motion.</p>
-
-<h5>Arguments:</h5>
-<p>This instruction requires several arguments:</p>
-
-<ol>
-  <li>The optional "cconv" marker indicates which <a href="#callingconv">calling
-      convention</a> the call should use.  If none is specified, the call
-      defaults to using C calling conventions.</li>
-
-  <li>The optional <a href="#paramattrs">Parameter Attributes</a> list for
-      return values. Only '<tt>zeroext</tt>', '<tt>signext</tt>', and
-      '<tt>inreg</tt>' attributes are valid here.</li>
-
-  <li>'<tt>ptr to function ty</tt>': shall be the signature of the pointer to
-      function value being invoked.  In most cases, this is a direct function
-      invocation, but indirect <tt>invoke</tt>s are just as possible, branching
-      off an arbitrary pointer to function value.</li>
-
-  <li>'<tt>function ptr val</tt>': An LLVM value containing a pointer to a
-      function to be invoked. </li>
-
-  <li>'<tt>function args</tt>': argument list whose types match the function
-      signature argument types and parameter attributes. All arguments must be
-      of <a href="#t_firstclass">first class</a> type. If the function
-      signature indicates the function accepts a variable number of arguments,
-      the extra arguments can be specified.</li>
-
-  <li>'<tt>normal label</tt>': the label reached when the called function
-      executes a '<tt><a href="#i_ret">ret</a></tt>' instruction. </li>
-
-  <li>'<tt>exception label</tt>': the label reached when a callee returns via
-      the <a href="#i_resume"><tt>resume</tt></a> instruction or other exception
-      handling mechanism.</li>
-
-  <li>The optional <a href="#fnattrs">function attributes</a> list. Only
-      '<tt>noreturn</tt>', '<tt>nounwind</tt>', '<tt>readonly</tt>' and
-      '<tt>readnone</tt>' attributes are valid here.</li>
-</ol>
-
-<h5>Semantics:</h5>
-<p>This instruction is designed to operate as a standard
-   '<tt><a href="#i_call">call</a></tt>' instruction in most regards.  The
-   primary difference is that it establishes an association with a label, which
-   is used by the runtime library to unwind the stack.</p>
-
-<p>This instruction is used in languages with destructors to ensure that proper
-   cleanup is performed in the case of either a <tt>longjmp</tt> or a thrown
-   exception.  Additionally, this is important for implementation of
-   '<tt>catch</tt>' clauses in high-level languages that support them.</p>
-
-<p>For the purposes of the SSA form, the definition of the value returned by the
-   '<tt>invoke</tt>' instruction is deemed to occur on the edge from the current
-   block to the "normal" label. If the callee unwinds then no return value is
-   available.</p>
-
-<h5>Example:</h5>
-<pre>
-  %retval = invoke i32 @Test(i32 15) to label %Continue
-              unwind label %TestCleanup              <i>; {i32}:retval set</i>
-  %retval = invoke <a href="#callingconv">coldcc</a> i32 %Testfnptr(i32 15) to label %Continue
-              unwind label %TestCleanup              <i>; {i32}:retval set</i>
-</pre>
-
-</div>
-
- <!-- _______________________________________________________________________ -->
-
-<h4>
-  <a name="i_resume">'<tt>resume</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  resume &lt;type&gt; &lt;value&gt;
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>resume</tt>' instruction is a terminator instruction that has no
-   successors.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>resume</tt>' instruction requires one argument, which must have the
-   same type as the result of any '<tt>landingpad</tt>' instruction in the same
-   function.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>resume</tt>' instruction resumes propagation of an existing
-   (in-flight) exception whose unwinding was interrupted with
-   a <a href="#i_landingpad"><tt>landingpad</tt></a> instruction.</p>
-
-<h5>Example:</h5>
-<pre>
-  resume { i8*, i32 } %exn
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-
-<h4>
-  <a name="i_unreachable">'<tt>unreachable</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  unreachable
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>unreachable</tt>' instruction has no defined semantics.  This
-   instruction is used to inform the optimizer that a particular portion of the
-   code is not reachable.  This can be used to indicate that the code after a
-   no-return function cannot be reached, and other facts.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>unreachable</tt>' instruction has no defined semantics.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="binaryops">Binary Operations</a>
-</h3>
-
-<div>
-
-<p>Binary operators are used to do most of the computation in a program.  They
-   require two operands of the same type, execute an operation on them, and
-   produce a single value.  The operands might represent multiple data, as is
-   the case with the <a href="#t_vector">vector</a> data type.  The result value
-   has the same type as its operands.</p>
-
-<p>There are several different binary operators:</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_add">'<tt>add</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = add &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;          <i>; yields {ty}:result</i>
-  &lt;result&gt; = add nuw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
-  &lt;result&gt; = add nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
-  &lt;result&gt; = add nuw nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;  <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>add</tt>' instruction returns the sum of its two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>add</tt>' instruction must
-   be <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
-   integer values. Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the integer sum of the two operands.</p>
-
-<p>If the sum has unsigned overflow, the result returned is the mathematical
-   result modulo 2<sup>n</sup>, where n is the bit width of the result.</p>
-
-<p>Because LLVM integers use a two's complement representation, this instruction
-   is appropriate for both signed and unsigned integers.</p>
-
-<p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
-   and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
-   <tt>nsw</tt> keywords are present, the result value of the <tt>add</tt>
-   is a <a href="#poisonvalues">poison value</a> if unsigned and/or signed overflow,
-   respectively, occurs.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = add i32 4, %var          <i>; yields {i32}:result = 4 + %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_fadd">'<tt>fadd</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fadd &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fadd</tt>' instruction returns the sum of its two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>fadd</tt>' instruction must be
-   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
-   floating point values. Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the floating point sum of the two operands.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = fadd float 4.0, %var          <i>; yields {float}:result = 4.0 + %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_sub">'<tt>sub</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = sub &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;          <i>; yields {ty}:result</i>
-  &lt;result&gt; = sub nuw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
-  &lt;result&gt; = sub nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
-  &lt;result&gt; = sub nuw nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;  <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>sub</tt>' instruction returns the difference of its two
-   operands.</p>
-
-<p>Note that the '<tt>sub</tt>' instruction is used to represent the
-   '<tt>neg</tt>' instruction present in most other intermediate
-   representations.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>sub</tt>' instruction must
-   be <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
-   integer values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the integer difference of the two operands.</p>
-
-<p>If the difference has unsigned overflow, the result returned is the
-   mathematical result modulo 2<sup>n</sup>, where n is the bit width of the
-   result.</p>
-
-<p>Because LLVM integers use a two's complement representation, this instruction
-   is appropriate for both signed and unsigned integers.</p>
-
-<p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
-   and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
-   <tt>nsw</tt> keywords are present, the result value of the <tt>sub</tt>
-   is a <a href="#poisonvalues">poison value</a> if unsigned and/or signed overflow,
-   respectively, occurs.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = sub i32 4, %var          <i>; yields {i32}:result = 4 - %var</i>
-  &lt;result&gt; = sub i32 0, %val          <i>; yields {i32}:result = -%var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_fsub">'<tt>fsub</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fsub &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fsub</tt>' instruction returns the difference of its two
-   operands.</p>
-
-<p>Note that the '<tt>fsub</tt>' instruction is used to represent the
-   '<tt>fneg</tt>' instruction present in most other intermediate
-   representations.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>fsub</tt>' instruction must be
-   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
-   floating point values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the floating point difference of the two operands.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = fsub float 4.0, %var           <i>; yields {float}:result = 4.0 - %var</i>
-  &lt;result&gt; = fsub float -0.0, %val          <i>; yields {float}:result = -%var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_mul">'<tt>mul</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = mul &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;          <i>; yields {ty}:result</i>
-  &lt;result&gt; = mul nuw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
-  &lt;result&gt; = mul nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
-  &lt;result&gt; = mul nuw nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;  <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>mul</tt>' instruction returns the product of its two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>mul</tt>' instruction must
-   be <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
-   integer values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the integer product of the two operands.</p>
-
-<p>If the result of the multiplication has unsigned overflow, the result
-   returned is the mathematical result modulo 2<sup>n</sup>, where n is the bit
-   width of the result.</p>
-
-<p>Because LLVM integers use a two's complement representation, and the result
-   is the same width as the operands, this instruction returns the correct
-   result for both signed and unsigned integers.  If a full product
-   (e.g. <tt>i32</tt>x<tt>i32</tt>-><tt>i64</tt>) is needed, the operands should
-   be sign-extended or zero-extended as appropriate to the width of the full
-   product.</p>
-
-<p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
-   and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
-   <tt>nsw</tt> keywords are present, the result value of the <tt>mul</tt>
-   is a <a href="#poisonvalues">poison value</a> if unsigned and/or signed overflow,
-   respectively, occurs.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = mul i32 4, %var          <i>; yields {i32}:result = 4 * %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_fmul">'<tt>fmul</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fmul &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fmul</tt>' instruction returns the product of its two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>fmul</tt>' instruction must be
-   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
-   floating point values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the floating point product of the two operands.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = fmul float 4.0, %var          <i>; yields {float}:result = 4.0 * %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_udiv">'<tt>udiv</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = udiv &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;         <i>; yields {ty}:result</i>
-  &lt;result&gt; = udiv exact &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>udiv</tt>' instruction returns the quotient of its two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>udiv</tt>' instruction must be
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the unsigned integer quotient of the two operands.</p>
-
-<p>Note that unsigned integer division and signed integer division are distinct
-   operations; for signed integer division, use '<tt>sdiv</tt>'.</p>
-
-<p>Division by zero leads to undefined behavior.</p>
-
-<p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>udiv</tt> is a <a href="#poisonvalues">poison value</a> if %op1 is not a
-  multiple of %op2 (as such, "((a udiv exact b) mul b) == a").</p>
-
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = udiv i32 4, %var          <i>; yields {i32}:result = 4 / %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_sdiv">'<tt>sdiv</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = sdiv &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;         <i>; yields {ty}:result</i>
-  &lt;result&gt; = sdiv exact &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>sdiv</tt>' instruction returns the quotient of its two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>sdiv</tt>' instruction must be
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the signed integer quotient of the two operands rounded
-   towards zero.</p>
-
-<p>Note that signed integer division and unsigned integer division are distinct
-   operations; for unsigned integer division, use '<tt>udiv</tt>'.</p>
-
-<p>Division by zero leads to undefined behavior. Overflow also leads to
-   undefined behavior; this is a rare case, but can occur, for example, by doing
-   a 32-bit division of -2147483648 by -1.</p>
-
-<p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>sdiv</tt> is a <a href="#poisonvalues">poison value</a> if the result would
-   be rounded.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = sdiv i32 4, %var          <i>; yields {i32}:result = 4 / %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_fdiv">'<tt>fdiv</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fdiv &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fdiv</tt>' instruction returns the quotient of its two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>fdiv</tt>' instruction must be
-   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
-   floating point values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is the floating point quotient of the two operands.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = fdiv float 4.0, %var          <i>; yields {float}:result = 4.0 / %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_urem">'<tt>urem</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = urem &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>urem</tt>' instruction returns the remainder from the unsigned
-   division of its two arguments.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>urem</tt>' instruction must be
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>This instruction returns the unsigned integer <i>remainder</i> of a division.
-   This instruction always performs an unsigned division to get the
-   remainder.</p>
-
-<p>Note that unsigned integer remainder and signed integer remainder are
-   distinct operations; for signed integer remainder, use '<tt>srem</tt>'.</p>
-
-<p>Taking the remainder of a division by zero leads to undefined behavior.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = urem i32 4, %var          <i>; yields {i32}:result = 4 % %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_srem">'<tt>srem</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = srem &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>srem</tt>' instruction returns the remainder from the signed
-   division of its two operands. This instruction can also take
-   <a href="#t_vector">vector</a> versions of the values in which case the
-   elements must be integers.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>srem</tt>' instruction must be
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>This instruction returns the <i>remainder</i> of a division (where the result
-   is either zero or has the same sign as the dividend, <tt>op1</tt>), not the
-   <i>modulo</i> operator (where the result is either zero or has the same sign
-   as the divisor, <tt>op2</tt>) of a value.
-   For more information about the difference,
-   see <a href="http://mathforum.org/dr.math/problems/anne.4.28.99.html">The
-   Math Forum</a>. For a table of how this is implemented in various languages,
-   please see <a href="http://en.wikipedia.org/wiki/Modulo_operation">
-   Wikipedia: modulo operation</a>.</p>
-
-<p>Note that signed integer remainder and unsigned integer remainder are
-   distinct operations; for unsigned integer remainder, use '<tt>urem</tt>'.</p>
-
-<p>Taking the remainder of a division by zero leads to undefined behavior.
-   Overflow also leads to undefined behavior; this is a rare case, but can
-   occur, for example, by taking the remainder of a 32-bit division of
-   -2147483648 by -1.  (The remainder doesn't actually overflow, but this rule
-   lets srem be implemented using instructions that return both the result of
-   the division and the remainder.)</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = srem i32 4, %var          <i>; yields {i32}:result = 4 % %var</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_frem">'<tt>frem</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = frem &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>frem</tt>' instruction returns the remainder from the division of
-   its two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>frem</tt>' instruction must be
-   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
-   floating point values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>This instruction returns the <i>remainder</i> of a division.  The remainder
-   has the same sign as the dividend.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = frem float 4.0, %var          <i>; yields {float}:result = 4.0 % %var</i>
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="bitwiseops">Bitwise Binary Operations</a>
-</h3>
-
-<div>
-
-<p>Bitwise binary operators are used to do various forms of bit-twiddling in a
-   program.  They are generally very efficient instructions and can commonly be
-   strength reduced from other instructions.  They require two operands of the
-   same type, execute an operation on them, and produce a single value.  The
-   resulting value is the same type as its operands.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_shl">'<tt>shl</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = shl &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;           <i>; yields {ty}:result</i>
-  &lt;result&gt; = shl nuw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;       <i>; yields {ty}:result</i>
-  &lt;result&gt; = shl nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;       <i>; yields {ty}:result</i>
-  &lt;result&gt; = shl nuw nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>shl</tt>' instruction returns the first operand shifted to the left
-   a specified number of bits.</p>
-
-<h5>Arguments:</h5>
-<p>Both arguments to the '<tt>shl</tt>' instruction must be the
-    same <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
-    integer type.  '<tt>op2</tt>' is treated as an unsigned value.</p>
-
-<h5>Semantics:</h5>
-<p>The value produced is <tt>op1</tt> * 2<sup><tt>op2</tt></sup> mod
-   2<sup>n</sup>, where <tt>n</tt> is the width of the result.  If <tt>op2</tt>
-   is (statically or dynamically) negative or equal to or larger than the number
-   of bits in <tt>op1</tt>, the result is undefined.  If the arguments are
-   vectors, each vector element of <tt>op1</tt> is shifted by the corresponding
-   shift amount in <tt>op2</tt>.</p>
-
-<p>If the <tt>nuw</tt> keyword is present, then the shift produces a
-   <a href="#poisonvalues">poison value</a> if it shifts out any non-zero bits.  If
-   the <tt>nsw</tt> keyword is present, then the shift produces a
-   <a href="#poisonvalues">poison value</a> if it shifts out any bits that disagree
-   with the resultant sign bit.  As such, NUW/NSW have the same semantics as
-   they would if the shift were expressed as a mul instruction with the same
-   nsw/nuw bits in (mul %op1, (shl 1, %op2)).</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = shl i32 4, %var   <i>; yields {i32}: 4 &lt;&lt; %var</i>
-  &lt;result&gt; = shl i32 4, 2      <i>; yields {i32}: 16</i>
-  &lt;result&gt; = shl i32 1, 10     <i>; yields {i32}: 1024</i>
-  &lt;result&gt; = shl i32 1, 32     <i>; undefined</i>
-  &lt;result&gt; = shl &lt;2 x i32&gt; &lt; i32 1, i32 1&gt;, &lt; i32 1, i32 2&gt;   <i>; yields: result=&lt;2 x i32&gt; &lt; i32 2, i32 4&gt;</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_lshr">'<tt>lshr</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = lshr &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;         <i>; yields {ty}:result</i>
-  &lt;result&gt; = lshr exact &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>lshr</tt>' instruction (logical shift right) returns the first
-   operand shifted to the right a specified number of bits with zero fill.</p>
-
-<h5>Arguments:</h5>
-<p>Both arguments to the '<tt>lshr</tt>' instruction must be the same
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   type. '<tt>op2</tt>' is treated as an unsigned value.</p>
-
-<h5>Semantics:</h5>
-<p>This instruction always performs a logical shift right operation. The most
-   significant bits of the result will be filled with zero bits after the shift.
-   If <tt>op2</tt> is (statically or dynamically) equal to or larger than the
-   number of bits in <tt>op1</tt>, the result is undefined. If the arguments are
-   vectors, each vector element of <tt>op1</tt> is shifted by the corresponding
-   shift amount in <tt>op2</tt>.</p>
-
-<p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>lshr</tt> is a <a href="#poisonvalues">poison value</a> if any of the bits
-   shifted out are non-zero.</p>
-
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = lshr i32 4, 1   <i>; yields {i32}:result = 2</i>
-  &lt;result&gt; = lshr i32 4, 2   <i>; yields {i32}:result = 1</i>
-  &lt;result&gt; = lshr i8  4, 3   <i>; yields {i8}:result = 0</i>
-  &lt;result&gt; = lshr i8 -2, 1   <i>; yields {i8}:result = 0x7FFFFFFF </i>
-  &lt;result&gt; = lshr i32 1, 32  <i>; undefined</i>
-  &lt;result&gt; = lshr &lt;2 x i32&gt; &lt; i32 -2, i32 4&gt;, &lt; i32 1, i32 2&gt;   <i>; yields: result=&lt;2 x i32&gt; &lt; i32 0x7FFFFFFF, i32 1&gt;</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_ashr">'<tt>ashr</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = ashr &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;         <i>; yields {ty}:result</i>
-  &lt;result&gt; = ashr exact &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>ashr</tt>' instruction (arithmetic shift right) returns the first
-   operand shifted to the right a specified number of bits with sign
-   extension.</p>
-
-<h5>Arguments:</h5>
-<p>Both arguments to the '<tt>ashr</tt>' instruction must be the same
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   type.  '<tt>op2</tt>' is treated as an unsigned value.</p>
-
-<h5>Semantics:</h5>
-<p>This instruction always performs an arithmetic shift right operation, The
-   most significant bits of the result will be filled with the sign bit
-   of <tt>op1</tt>.  If <tt>op2</tt> is (statically or dynamically) equal to or
-   larger than the number of bits in <tt>op1</tt>, the result is undefined. If
-   the arguments are vectors, each vector element of <tt>op1</tt> is shifted by
-   the corresponding shift amount in <tt>op2</tt>.</p>
-
-<p>If the <tt>exact</tt> keyword is present, the result value of the
-   <tt>ashr</tt> is a <a href="#poisonvalues">poison value</a> if any of the bits
-   shifted out are non-zero.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = ashr i32 4, 1   <i>; yields {i32}:result = 2</i>
-  &lt;result&gt; = ashr i32 4, 2   <i>; yields {i32}:result = 1</i>
-  &lt;result&gt; = ashr i8  4, 3   <i>; yields {i8}:result = 0</i>
-  &lt;result&gt; = ashr i8 -2, 1   <i>; yields {i8}:result = -1</i>
-  &lt;result&gt; = ashr i32 1, 32  <i>; undefined</i>
-  &lt;result&gt; = ashr &lt;2 x i32&gt; &lt; i32 -2, i32 4&gt;, &lt; i32 1, i32 3&gt;   <i>; yields: result=&lt;2 x i32&gt; &lt; i32 -1, i32 0&gt;</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_and">'<tt>and</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = and &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>and</tt>' instruction returns the bitwise logical and of its two
-   operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>and</tt>' instruction must be
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The truth table used for the '<tt>and</tt>' instruction is:</p>
-
-<table border="1" cellspacing="0" cellpadding="4">
-  <tbody>
-    <tr>
-      <th>In0</th>
-      <th>In1</th>
-      <th>Out</th>
-    </tr>
-    <tr>
-      <td>0</td>
-      <td>0</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <td>0</td>
-      <td>1</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <td>1</td>
-      <td>0</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <td>1</td>
-      <td>1</td>
-      <td>1</td>
-    </tr>
-  </tbody>
-</table>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = and i32 4, %var         <i>; yields {i32}:result = 4 &amp; %var</i>
-  &lt;result&gt; = and i32 15, 40          <i>; yields {i32}:result = 8</i>
-  &lt;result&gt; = and i32 4, 8            <i>; yields {i32}:result = 0</i>
-</pre>
-</div>
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_or">'<tt>or</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = or &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>or</tt>' instruction returns the bitwise logical inclusive or of its
-   two operands.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>or</tt>' instruction must be
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The truth table used for the '<tt>or</tt>' instruction is:</p>
-
-<table border="1" cellspacing="0" cellpadding="4">
-  <tbody>
-    <tr>
-      <th>In0</th>
-      <th>In1</th>
-      <th>Out</th>
-    </tr>
-    <tr>
-      <td>0</td>
-      <td>0</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <td>0</td>
-      <td>1</td>
-      <td>1</td>
-    </tr>
-    <tr>
-      <td>1</td>
-      <td>0</td>
-      <td>1</td>
-    </tr>
-    <tr>
-      <td>1</td>
-      <td>1</td>
-      <td>1</td>
-    </tr>
-  </tbody>
-</table>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = or i32 4, %var         <i>; yields {i32}:result = 4 | %var</i>
-  &lt;result&gt; = or i32 15, 40          <i>; yields {i32}:result = 47</i>
-  &lt;result&gt; = or i32 4, 8            <i>; yields {i32}:result = 12</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_xor">'<tt>xor</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = xor &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>xor</tt>' instruction returns the bitwise logical exclusive or of
-   its two operands.  The <tt>xor</tt> is used to implement the "one's
-   complement" operation, which is the "~" operator in C.</p>
-
-<h5>Arguments:</h5>
-<p>The two arguments to the '<tt>xor</tt>' instruction must be
-   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
-   values.  Both arguments must have identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The truth table used for the '<tt>xor</tt>' instruction is:</p>
-
-<table border="1" cellspacing="0" cellpadding="4">
-  <tbody>
-    <tr>
-      <th>In0</th>
-      <th>In1</th>
-      <th>Out</th>
-    </tr>
-    <tr>
-      <td>0</td>
-      <td>0</td>
-      <td>0</td>
-    </tr>
-    <tr>
-      <td>0</td>
-      <td>1</td>
-      <td>1</td>
-    </tr>
-    <tr>
-      <td>1</td>
-      <td>0</td>
-      <td>1</td>
-    </tr>
-    <tr>
-      <td>1</td>
-      <td>1</td>
-      <td>0</td>
-    </tr>
-  </tbody>
-</table>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = xor i32 4, %var         <i>; yields {i32}:result = 4 ^ %var</i>
-  &lt;result&gt; = xor i32 15, 40          <i>; yields {i32}:result = 39</i>
-  &lt;result&gt; = xor i32 4, 8            <i>; yields {i32}:result = 12</i>
-  &lt;result&gt; = xor i32 %V, -1          <i>; yields {i32}:result = ~%V</i>
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="vectorops">Vector Operations</a>
-</h3>
-
-<div>
-
-<p>LLVM supports several instructions to represent vector operations in a
-   target-independent manner.  These instructions cover the element-access and
-   vector-specific operations needed to process vectors effectively.  While LLVM
-   does directly support these vector operations, many sophisticated algorithms
-   will want to use target-specific intrinsics to take full advantage of a
-   specific target.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_extractelement">'<tt>extractelement</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = extractelement &lt;n x &lt;ty&gt;&gt; &lt;val&gt;, i32 &lt;idx&gt;    <i>; yields &lt;ty&gt;</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>extractelement</tt>' instruction extracts a single scalar element
-   from a vector at a specified index.</p>
-
-
-<h5>Arguments:</h5>
-<p>The first operand of an '<tt>extractelement</tt>' instruction is a value
-   of <a href="#t_vector">vector</a> type.  The second operand is an index
-   indicating the position from which to extract the element.  The index may be
-   a variable.</p>
-
-<h5>Semantics:</h5>
-<p>The result is a scalar of the same type as the element type of
-   <tt>val</tt>.  Its value is the value at position <tt>idx</tt> of
-   <tt>val</tt>.  If <tt>idx</tt> exceeds the length of <tt>val</tt>, the
-   results are undefined.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = extractelement &lt;4 x i32&gt; %vec, i32 0    <i>; yields i32</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_insertelement">'<tt>insertelement</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = insertelement &lt;n x &lt;ty&gt;&gt; &lt;val&gt;, &lt;ty&gt; &lt;elt&gt;, i32 &lt;idx&gt;    <i>; yields &lt;n x &lt;ty&gt;&gt;</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>insertelement</tt>' instruction inserts a scalar element into a
-   vector at a specified index.</p>
-
-<h5>Arguments:</h5>
-<p>The first operand of an '<tt>insertelement</tt>' instruction is a value
-   of <a href="#t_vector">vector</a> type.  The second operand is a scalar value
-   whose type must equal the element type of the first operand.  The third
-   operand is an index indicating the position at which to insert the value.
-   The index may be a variable.</p>
-
-<h5>Semantics:</h5>
-<p>The result is a vector of the same type as <tt>val</tt>.  Its element values
-   are those of <tt>val</tt> except at position <tt>idx</tt>, where it gets the
-   value <tt>elt</tt>.  If <tt>idx</tt> exceeds the length of <tt>val</tt>, the
-   results are undefined.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = insertelement &lt;4 x i32&gt; %vec, i32 1, i32 0    <i>; yields &lt;4 x i32&gt;</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_shufflevector">'<tt>shufflevector</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = shufflevector &lt;n x &lt;ty&gt;&gt; &lt;v1&gt;, &lt;n x &lt;ty&gt;&gt; &lt;v2&gt;, &lt;m x i32&gt; &lt;mask&gt;    <i>; yields &lt;m x &lt;ty&gt;&gt;</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>shufflevector</tt>' instruction constructs a permutation of elements
-   from two input vectors, returning a vector with the same element type as the
-   input and length that is the same as the shuffle mask.</p>
-
-<h5>Arguments:</h5>
-<p>The first two operands of a '<tt>shufflevector</tt>' instruction are vectors
-   with the same type.  The third argument is a shuffle mask whose
-   element type is always 'i32'.  The result of the instruction is a vector
-   whose length is the same as the shuffle mask and whose element type is the
-   same as the element type of the first two operands.</p>
-
-<p>The shuffle mask operand is required to be a constant vector with either
-   constant integer or undef values.</p>
-
-<h5>Semantics:</h5>
-<p>The elements of the two input vectors are numbered from left to right across
-   both of the vectors.  The shuffle mask operand specifies, for each element of
-   the result vector, which element of the two input vectors the result element
-   gets.  The element selector may be undef (meaning "don't care") and the
-   second operand may be undef if performing a shuffle from only one vector.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2,
-                          &lt;4 x i32&gt; &lt;i32 0, i32 4, i32 1, i32 5&gt;  <i>; yields &lt;4 x i32&gt;</i>
-  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; undef,
-                          &lt;4 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3&gt;  <i>; yields &lt;4 x i32&gt;</i> - Identity shuffle.
-  &lt;result&gt; = shufflevector &lt;8 x i32&gt; %v1, &lt;8 x i32&gt; undef,
-                          &lt;4 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3&gt;  <i>; yields &lt;4 x i32&gt;</i>
-  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2,
-                          &lt;8 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 &gt;  <i>; yields &lt;8 x i32&gt;</i>
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="aggregateops">Aggregate Operations</a>
-</h3>
-
-<div>
-
-<p>LLVM supports several instructions for working with
-  <a href="#t_aggregate">aggregate</a> values.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_extractvalue">'<tt>extractvalue</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = extractvalue &lt;aggregate type&gt; &lt;val&gt;, &lt;idx&gt;{, &lt;idx&gt;}*
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>extractvalue</tt>' instruction extracts the value of a member field
-   from an <a href="#t_aggregate">aggregate</a> value.</p>
-
-<h5>Arguments:</h5>
-<p>The first operand of an '<tt>extractvalue</tt>' instruction is a value
-   of <a href="#t_struct">struct</a> or
-   <a href="#t_array">array</a> type.  The operands are constant indices to
-   specify which value to extract in a similar manner as indices in a
-   '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.</p>
-   <p>The major differences to <tt>getelementptr</tt> indexing are:</p>
-     <ul>
-       <li>Since the value being indexed is not a pointer, the first index is
-           omitted and assumed to be zero.</li>
-       <li>At least one index must be specified.</li>
-       <li>Not only struct indices but also array indices must be in
-           bounds.</li>
-     </ul>
-
-<h5>Semantics:</h5>
-<p>The result is the value at the position in the aggregate specified by the
-   index operands.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = extractvalue {i32, float} %agg, 0    <i>; yields i32</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_insertvalue">'<tt>insertvalue</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = insertvalue &lt;aggregate type&gt; &lt;val&gt;, &lt;ty&gt; &lt;elt&gt;, &lt;idx&gt;{, &lt;idx&gt;}*    <i>; yields &lt;aggregate type&gt;</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>insertvalue</tt>' instruction inserts a value into a member field
-   in an <a href="#t_aggregate">aggregate</a> value.</p>
-
-<h5>Arguments:</h5>
-<p>The first operand of an '<tt>insertvalue</tt>' instruction is a value
-   of <a href="#t_struct">struct</a> or
-   <a href="#t_array">array</a> type.  The second operand is a first-class
-   value to insert.  The following operands are constant indices indicating
-   the position at which to insert the value in a similar manner as indices in a
-   '<tt><a href="#i_extractvalue">extractvalue</a></tt>' instruction.  The
-   value to insert must have the same type as the value identified by the
-   indices.</p>
-
-<h5>Semantics:</h5>
-<p>The result is an aggregate of the same type as <tt>val</tt>.  Its value is
-   that of <tt>val</tt> except that the value at the position specified by the
-   indices is that of <tt>elt</tt>.</p>
-
-<h5>Example:</h5>
-<pre>
-  %agg1 = insertvalue {i32, float} undef, i32 1, 0              <i>; yields {i32 1, float undef}</i>
-  %agg2 = insertvalue {i32, float} %agg1, float %val, 1         <i>; yields {i32 1, float %val}</i>
-  %agg3 = insertvalue {i32, {float}} %agg1, float %val, 1, 0    <i>; yields {i32 1, float %val}</i>
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="memoryops">Memory Access and Addressing Operations</a>
-</h3>
-
-<div>
-
-<p>A key design point of an SSA-based representation is how it represents
-   memory.  In LLVM, no memory locations are in SSA form, which makes things
-   very simple.  This section describes how to read, write, and allocate
-   memory in LLVM.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_alloca">'<tt>alloca</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = alloca &lt;type&gt;[, &lt;ty&gt; &lt;NumElements&gt;][, align &lt;alignment&gt;]     <i>; yields {type*}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>alloca</tt>' instruction allocates memory on the stack frame of the
-   currently executing function, to be automatically released when this function
-   returns to its caller. The object is always allocated in the generic address
-   space (address space zero).</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>alloca</tt>' instruction
-   allocates <tt>sizeof(&lt;type&gt;)*NumElements</tt> bytes of memory on the
-   runtime stack, returning a pointer of the appropriate type to the program.
-   If "NumElements" is specified, it is the number of elements allocated,
-   otherwise "NumElements" is defaulted to be one.  If a constant alignment is
-   specified, the value result of the allocation is guaranteed to be aligned to
-   at least that boundary.  If not specified, or if zero, the target can choose
-   to align the allocation on any convenient boundary compatible with the
-   type.</p>
-
-<p>'<tt>type</tt>' may be any sized type.</p>
-
-<h5>Semantics:</h5>
-<p>Memory is allocated; a pointer is returned.  The operation is undefined if
-   there is insufficient stack space for the allocation.  '<tt>alloca</tt>'d
-   memory is automatically released when the function returns.  The
-   '<tt>alloca</tt>' instruction is commonly used to represent automatic
-   variables that must have an address available.  When the function returns
-   (either with the <tt><a href="#i_ret">ret</a></tt>
-   or <tt><a href="#i_resume">resume</a></tt> instructions), the memory is
-   reclaimed.  Allocating zero bytes is legal, but the result is undefined.
-   The order in which memory is allocated (ie., which way the stack grows) is
-   not specified.</p>
-
-<p>
-
-<h5>Example:</h5>
-<pre>
-  %ptr = alloca i32                             <i>; yields {i32*}:ptr</i>
-  %ptr = alloca i32, i32 4                      <i>; yields {i32*}:ptr</i>
-  %ptr = alloca i32, i32 4, align 1024          <i>; yields {i32*}:ptr</i>
-  %ptr = alloca i32, align 1024                 <i>; yields {i32*}:ptr</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_load">'<tt>load</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = load [volatile] &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;][, !invariant.load !&lt;index&gt;]
-  &lt;result&gt; = load atomic [volatile] &lt;ty&gt;* &lt;pointer&gt; [singlethread] &lt;ordering&gt;, align &lt;alignment&gt;
-  !&lt;index&gt; = !{ i32 1 }
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>load</tt>' instruction is used to read from memory.</p>
-
-<h5>Arguments:</h5>
-<p>The argument to the '<tt>load</tt>' instruction specifies the memory address
-   from which to load.  The pointer must point to
-   a <a href="#t_firstclass">first class</a> type.  If the <tt>load</tt> is
-   marked as <tt>volatile</tt>, then the optimizer is not allowed to modify the
-   number or order of execution of this <tt>load</tt> with other <a
-   href="#volatile">volatile operations</a>.</p>
-
-<p>If the <code>load</code> is marked as <code>atomic</code>, it takes an extra
-   <a href="#ordering">ordering</a> and optional <code>singlethread</code>
-   argument.  The <code>release</code> and <code>acq_rel</code> orderings are
-   not valid on <code>load</code> instructions.  Atomic loads produce <a
-   href="#memorymodel">defined</a> results when they may see multiple atomic
-   stores.  The type of the pointee must be an integer type whose bit width
-   is a power of two greater than or equal to eight and less than or equal
-   to a target-specific size limit. <code>align</code> must be explicitly
-   specified on atomic loads, and the load has undefined behavior if the
-   alignment is not set to a value which is at least the size in bytes of
-   the pointee. <code>!nontemporal</code> does not have any defined semantics
-   for atomic loads.</p>
-
-<p>The optional constant <tt>align</tt> argument specifies the alignment of the
-   operation (that is, the alignment of the memory address). A value of 0 or an
-   omitted <tt>align</tt> argument means that the operation has the abi
-   alignment for the target. It is the responsibility of the code emitter to
-   ensure that the alignment information is correct. Overestimating the
-   alignment results in undefined behavior. Underestimating the alignment may
-   produce less efficient code. An alignment of 1 is always safe.</p>
-
-<p>The optional <tt>!nontemporal</tt> metadata must reference a single
-   metatadata name &lt;index&gt; corresponding to a metadata node with
-   one <tt>i32</tt> entry of value 1.  The existence of
-   the <tt>!nontemporal</tt> metatadata on the instruction tells the optimizer
-   and code generator that this load is not expected to be reused in the cache.
-   The code generator may select special instructions to save cache bandwidth,
-   such as the <tt>MOVNT</tt> instruction on x86.</p>
-
-<p>The optional <tt>!invariant.load</tt> metadata must reference a single
-   metatadata name &lt;index&gt; corresponding to a metadata node with no
-   entries.  The existence of the <tt>!invariant.load</tt> metatadata on the
-   instruction tells the optimizer and code generator that this load address
-   points to memory which does not change value during program execution.
-   The optimizer may then move this load around, for example, by hoisting it
-   out of loops using loop invariant code motion.</p>
-
-<h5>Semantics:</h5>
-<p>The location of memory pointed to is loaded.  If the value being loaded is of
-   scalar type then the number of bytes read does not exceed the minimum number
-   of bytes needed to hold all bits of the type.  For example, loading an
-   <tt>i24</tt> reads at most three bytes.  When loading a value of a type like
-   <tt>i20</tt> with a size that is not an integral number of bytes, the result
-   is undefined if the value was not originally written using a store of the
-   same type.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %ptr = <a href="#i_alloca">alloca</a> i32                               <i>; yields {i32*}:ptr</i>
-  <a href="#i_store">store</a> i32 3, i32* %ptr                          <i>; yields {void}</i>
-  %val = load i32* %ptr                           <i>; yields {i32}:val = i32 3</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_store">'<tt>store</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  store [volatile] &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]        <i>; yields {void}</i>
-  store atomic [volatile] &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt; [singlethread] &lt;ordering&gt;, align &lt;alignment&gt;  <i>; yields {void}</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>store</tt>' instruction is used to write to memory.</p>
-
-<h5>Arguments:</h5>
-<p>There are two arguments to the '<tt>store</tt>' instruction: a value to store
-   and an address at which to store it.  The type of the
-   '<tt>&lt;pointer&gt;</tt>' operand must be a pointer to
-   the <a href="#t_firstclass">first class</a> type of the
-   '<tt>&lt;value&gt;</tt>' operand. If the <tt>store</tt> is marked as
-   <tt>volatile</tt>, then the optimizer is not allowed to modify the number or
-   order of execution of this <tt>store</tt> with other <a
-   href="#volatile">volatile operations</a>.</p>
-
-<p>If the <code>store</code> is marked as <code>atomic</code>, it takes an extra
-   <a href="#ordering">ordering</a> and optional <code>singlethread</code>
-   argument.  The <code>acquire</code> and <code>acq_rel</code> orderings aren't
-   valid on <code>store</code> instructions.  Atomic loads produce <a
-   href="#memorymodel">defined</a> results when they may see multiple atomic
-   stores. The type of the pointee must be an integer type whose bit width
-   is a power of two greater than or equal to eight and less than or equal
-   to a target-specific size limit. <code>align</code> must be explicitly
-   specified on atomic stores, and the store has undefined behavior if the
-   alignment is not set to a value which is at least the size in bytes of
-   the pointee. <code>!nontemporal</code> does not have any defined semantics
-   for atomic stores.</p>
-
-<p>The optional constant "align" argument specifies the alignment of the
-   operation (that is, the alignment of the memory address). A value of 0 or an
-   omitted "align" argument means that the operation has the abi
-   alignment for the target. It is the responsibility of the code emitter to
-   ensure that the alignment information is correct. Overestimating the
-   alignment results in an undefined behavior. Underestimating the alignment may
-   produce less efficient code. An alignment of 1 is always safe.</p>
-
-<p>The optional !nontemporal metadata must reference a single metatadata
-   name &lt;index&gt; corresponding to a metadata node with one i32 entry of
-   value 1.  The existence of the !nontemporal metatadata on the
-   instruction tells the optimizer and code generator that this load is
-   not expected to be reused in the cache.  The code generator may
-   select special instructions to save cache bandwidth, such as the
-   MOVNT instruction on x86.</p>
-
-
-<h5>Semantics:</h5>
-<p>The contents of memory are updated to contain '<tt>&lt;value&gt;</tt>' at the
-   location specified by the '<tt>&lt;pointer&gt;</tt>' operand.  If
-   '<tt>&lt;value&gt;</tt>' is of scalar type then the number of bytes written
-   does not exceed the minimum number of bytes needed to hold all bits of the
-   type.  For example, storing an <tt>i24</tt> writes at most three bytes.  When
-   writing a value of a type like <tt>i20</tt> with a size that is not an
-   integral number of bytes, it is unspecified what happens to the extra bits
-   that do not belong to the type, but they will typically be overwritten.</p>
-
-<h5>Example:</h5>
-<pre>
-  %ptr = <a href="#i_alloca">alloca</a> i32                               <i>; yields {i32*}:ptr</i>
-  store i32 3, i32* %ptr                          <i>; yields {void}</i>
-  %val = <a href="#i_load">load</a> i32* %ptr                           <i>; yields {i32}:val = i32 3</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-<a name="i_fence">'<tt>fence</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  fence [singlethread] &lt;ordering&gt;                   <i>; yields {void}</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fence</tt>' instruction is used to introduce happens-before edges
-between operations.</p>
-
-<h5>Arguments:</h5> <p>'<code>fence</code>' instructions take an <a
-href="#ordering">ordering</a> argument which defines what
-<i>synchronizes-with</i> edges they add.  They can only be given
-<code>acquire</code>, <code>release</code>, <code>acq_rel</code>, and
-<code>seq_cst</code> orderings.</p>
-
-<h5>Semantics:</h5>
-<p>A fence <var>A</var> which has (at least) <code>release</code> ordering
-semantics <i>synchronizes with</i> a fence <var>B</var> with (at least)
-<code>acquire</code> ordering semantics if and only if there exist atomic
-operations <var>X</var> and <var>Y</var>, both operating on some atomic object
-<var>M</var>, such that <var>A</var> is sequenced before <var>X</var>,
-<var>X</var> modifies <var>M</var> (either directly or through some side effect
-of a sequence headed by <var>X</var>), <var>Y</var> is sequenced before
-<var>B</var>, and <var>Y</var> observes <var>M</var>. This provides a
-<i>happens-before</i> dependency between <var>A</var> and <var>B</var>. Rather
-than an explicit <code>fence</code>, one (but not both) of the atomic operations
-<var>X</var> or <var>Y</var> might provide a <code>release</code> or
-<code>acquire</code> (resp.) ordering constraint and still
-<i>synchronize-with</i> the explicit <code>fence</code> and establish the
-<i>happens-before</i> edge.</p>
-
-<p>A <code>fence</code> which has <code>seq_cst</code> ordering, in addition to
-having both <code>acquire</code> and <code>release</code> semantics specified
-above, participates in the global program order of other <code>seq_cst</code>
-operations and/or fences.</p>
-
-<p>The optional "<a href="#singlethread"><code>singlethread</code></a>" argument
-specifies that the fence only synchronizes with other fences in the same
-thread.  (This is useful for interacting with signal handlers.)</p>
-
-<h5>Example:</h5>
-<pre>
-  fence acquire                          <i>; yields {void}</i>
-  fence singlethread seq_cst             <i>; yields {void}</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-<a name="i_cmpxchg">'<tt>cmpxchg</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  cmpxchg [volatile] &lt;ty&gt;* &lt;pointer&gt;, &lt;ty&gt; &lt;cmp&gt;, &lt;ty&gt; &lt;new&gt; [singlethread] &lt;ordering&gt;  <i>; yields {ty}</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>cmpxchg</tt>' instruction is used to atomically modify memory.
-It loads a value in memory and compares it to a given value. If they are
-equal, it stores a new value into the memory.</p>
-
-<h5>Arguments:</h5>
-<p>There are three arguments to the '<code>cmpxchg</code>' instruction: an
-address to operate on, a value to compare to the value currently be at that
-address, and a new value to place at that address if the compared values are
-equal.  The type of '<var>&lt;cmp&gt;</var>' must be an integer type whose
-bit width is a power of two greater than or equal to eight and less than
-or equal to a target-specific size limit. '<var>&lt;cmp&gt;</var>' and
-'<var>&lt;new&gt;</var>' must have the same type, and the type of
-'<var>&lt;pointer&gt;</var>' must be a pointer to that type. If the
-<code>cmpxchg</code> is marked as <code>volatile</code>, then the
-optimizer is not allowed to modify the number or order of execution
-of this <code>cmpxchg</code> with other <a href="#volatile">volatile
-operations</a>.</p>
-
-<!-- FIXME: Extend allowed types. -->
-
-<p>The <a href="#ordering"><var>ordering</var></a> argument specifies how this
-<code>cmpxchg</code> synchronizes with other atomic operations.</p>
-
-<p>The optional "<code>singlethread</code>" argument declares that the
-<code>cmpxchg</code> is only atomic with respect to code (usually signal
-handlers) running in the same thread as the <code>cmpxchg</code>.  Otherwise the
-cmpxchg is atomic with respect to all other code in the system.</p>
-
-<p>The pointer passed into cmpxchg must have alignment greater than or equal to
-the size in memory of the operand.
-
-<h5>Semantics:</h5>
-<p>The contents of memory at the location specified by the
-'<tt>&lt;pointer&gt;</tt>' operand is read and compared to
-'<tt>&lt;cmp&gt;</tt>'; if the read value is the equal,
-'<tt>&lt;new&gt;</tt>' is written.  The original value at the location
-is returned.
-
-<p>A successful <code>cmpxchg</code> is a read-modify-write instruction for the
-purpose of identifying <a href="#release_sequence">release sequences</a>.  A
-failed <code>cmpxchg</code> is equivalent to an atomic load with an ordering
-parameter determined by dropping any <code>release</code> part of the
-<code>cmpxchg</code>'s ordering.</p>
-
-<!--
-FIXME: Is compare_exchange_weak() necessary?  (Consider after we've done
-optimization work on ARM.)
-
-FIXME: Is a weaker ordering constraint on failure helpful in practice?
--->
-
-<h5>Example:</h5>
-<pre>
-entry:
-  %orig = atomic <a href="#i_load">load</a> i32* %ptr unordered                   <i>; yields {i32}</i>
-  <a href="#i_br">br</a> label %loop
-
-loop:
-  %cmp = <a href="#i_phi">phi</a> i32 [ %orig, %entry ], [%old, %loop]
-  %squared = <a href="#i_mul">mul</a> i32 %cmp, %cmp
-  %old = cmpxchg i32* %ptr, i32 %cmp, i32 %squared          <i>; yields {i32}</i>
-  %success = <a href="#i_icmp">icmp</a> eq i32 %cmp, %old
-  <a href="#i_br">br</a> i1 %success, label %done, label %loop
-
-done:
-  ...
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-<a name="i_atomicrmw">'<tt>atomicrmw</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  atomicrmw [volatile] &lt;operation&gt; &lt;ty&gt;* &lt;pointer&gt;, &lt;ty&gt; &lt;value&gt; [singlethread] &lt;ordering&gt;                   <i>; yields {ty}</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>atomicrmw</tt>' instruction is used to atomically modify memory.</p>
-
-<h5>Arguments:</h5>
-<p>There are three arguments to the '<code>atomicrmw</code>' instruction: an
-operation to apply, an address whose value to modify, an argument to the
-operation.  The operation must be one of the following keywords:</p>
-<ul>
-  <li>xchg</li>
-  <li>add</li>
-  <li>sub</li>
-  <li>and</li>
-  <li>nand</li>
-  <li>or</li>
-  <li>xor</li>
-  <li>max</li>
-  <li>min</li>
-  <li>umax</li>
-  <li>umin</li>
-</ul>
-
-<p>The type of '<var>&lt;value&gt;</var>' must be an integer type whose
-bit width is a power of two greater than or equal to eight and less than
-or equal to a target-specific size limit.  The type of the
-'<code>&lt;pointer&gt;</code>' operand must be a pointer to that type.
-If the <code>atomicrmw</code> is marked as <code>volatile</code>, then the
-optimizer is not allowed to modify the number or order of execution of this
-<code>atomicrmw</code> with other <a href="#volatile">volatile
-  operations</a>.</p>
-
-<!-- FIXME: Extend allowed types. -->
-
-<h5>Semantics:</h5>
-<p>The contents of memory at the location specified by the
-'<tt>&lt;pointer&gt;</tt>' operand are atomically read, modified, and written
-back.  The original value at the location is returned.  The modification is
-specified by the <var>operation</var> argument:</p>
-
-<ul>
-  <li>xchg: <code>*ptr = val</code></li>
-  <li>add: <code>*ptr = *ptr + val</code></li>
-  <li>sub: <code>*ptr = *ptr - val</code></li>
-  <li>and: <code>*ptr = *ptr &amp; val</code></li>
-  <li>nand: <code>*ptr = ~(*ptr &amp; val)</code></li>
-  <li>or: <code>*ptr = *ptr | val</code></li>
-  <li>xor: <code>*ptr = *ptr ^ val</code></li>
-  <li>max: <code>*ptr = *ptr &gt; val ? *ptr : val</code> (using a signed comparison)</li>
-  <li>min: <code>*ptr = *ptr &lt; val ? *ptr : val</code> (using a signed comparison)</li>
-  <li>umax: <code>*ptr = *ptr &gt; val ? *ptr : val</code> (using an unsigned comparison)</li>
-  <li>umin: <code>*ptr = *ptr &lt; val ? *ptr : val</code> (using an unsigned comparison)</li>
-</ul>
-
-<h5>Example:</h5>
-<pre>
-  %old = atomicrmw add i32* %ptr, i32 1 acquire                        <i>; yields {i32}</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_getelementptr">'<tt>getelementptr</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = getelementptr &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
-  &lt;result&gt; = getelementptr inbounds &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
-  &lt;result&gt; = getelementptr &lt;ptr vector&gt; ptrval, &lt;vector index type&gt; idx
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>getelementptr</tt>' instruction is used to get the address of a
-   subelement of an <a href="#t_aggregate">aggregate</a> data structure.
-   It performs address calculation only and does not access memory.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is always a pointer or a vector of pointers,
-   and forms the basis of the
-   calculation. The remaining arguments are indices that indicate which of the
-   elements of the aggregate object are indexed. The interpretation of each
-   index is dependent on the type being indexed into. The first index always
-   indexes the pointer value given as the first argument, the second index
-   indexes a value of the type pointed to (not necessarily the value directly
-   pointed to, since the first index can be non-zero), etc. The first type
-   indexed into must be a pointer value, subsequent types can be arrays,
-   vectors, and structs. Note that subsequent types being indexed into
-   can never be pointers, since that would require loading the pointer before
-   continuing calculation.</p>
-
-<p>The type of each index argument depends on the type it is indexing into.
-   When indexing into a (optionally packed) structure, only <tt>i32</tt>
-   integer <b>constants</b> are allowed (when using a vector of indices they
-   must all be the <b>same</b> <tt>i32</tt> integer constant).  When indexing
-   into an array, pointer or vector, integers of any width are allowed, and
-   they are not required to be constant.  These integers are treated as signed
-   values where relevant.</p>
-
-<p>For example, let's consider a C code fragment and how it gets compiled to
-   LLVM:</p>
-
-<pre class="doc_code">
-struct RT {
-  char A;
-  int B[10][20];
-  char C;
-};
-struct ST {
-  int X;
-  double Y;
-  struct RT Z;
-};
-
-int *foo(struct ST *s) {
-  return &amp;s[1].Z.B[5][13];
-}
-</pre>
-
-<p>The LLVM code generated by Clang is:</p>
-
-<pre class="doc_code">
-%struct.RT = <a href="#namedtypes">type</a> { i8, [10 x [20 x i32]], i8 }
-%struct.ST = <a href="#namedtypes">type</a> { i32, double, %struct.RT }
-
-define i32* @foo(%struct.ST* %s) nounwind uwtable readnone optsize ssp {
-entry:
-  %arrayidx = getelementptr inbounds %struct.ST* %s, i64 1, i32 2, i32 1, i64 5, i64 13
-  ret i32* %arrayidx
-}
-</pre>
-
-<h5>Semantics:</h5>
-<p>In the example above, the first index is indexing into the
-   '<tt>%struct.ST*</tt>' type, which is a pointer, yielding a
-   '<tt>%struct.ST</tt>' = '<tt>{ i32, double, %struct.RT }</tt>' type, a
-   structure. The second index indexes into the third element of the structure,
-   yielding a '<tt>%struct.RT</tt>' = '<tt>{ i8 , [10 x [20 x i32]], i8 }</tt>'
-   type, another structure. The third index indexes into the second element of
-   the structure, yielding a '<tt>[10 x [20 x i32]]</tt>' type, an array. The
-   two dimensions of the array are subscripted into, yielding an '<tt>i32</tt>'
-   type. The '<tt>getelementptr</tt>' instruction returns a pointer to this
-   element, thus computing a value of '<tt>i32*</tt>' type.</p>
-
-<p>Note that it is perfectly legal to index partially through a structure,
-   returning a pointer to an inner element.  Because of this, the LLVM code for
-   the given testcase is equivalent to:</p>
-
-<pre class="doc_code">
-define i32* @foo(%struct.ST* %s) {
-  %t1 = getelementptr %struct.ST* %s, i32 1                 <i>; yields %struct.ST*:%t1</i>
-  %t2 = getelementptr %struct.ST* %t1, i32 0, i32 2         <i>; yields %struct.RT*:%t2</i>
-  %t3 = getelementptr %struct.RT* %t2, i32 0, i32 1         <i>; yields [10 x [20 x i32]]*:%t3</i>
-  %t4 = getelementptr [10 x [20 x i32]]* %t3, i32 0, i32 5  <i>; yields [20 x i32]*:%t4</i>
-  %t5 = getelementptr [20 x i32]* %t4, i32 0, i32 13        <i>; yields i32*:%t5</i>
-  ret i32* %t5
-}
-</pre>
-
-<p>If the <tt>inbounds</tt> keyword is present, the result value of the
-   <tt>getelementptr</tt> is a <a href="#poisonvalues">poison value</a> if the
-   base pointer is not an <i>in bounds</i> address of an allocated object,
-   or if any of the addresses that would be formed by successive addition of
-   the offsets implied by the indices to the base address with infinitely
-   precise signed arithmetic are not an <i>in bounds</i> address of that
-   allocated object. The <i>in bounds</i> addresses for an allocated object
-   are all the addresses that point into the object, plus the address one
-   byte past the end.
-   In cases where the base is a vector of pointers the <tt>inbounds</tt> keyword
-   applies to each of the computations element-wise. </p>
-
-<p>If the <tt>inbounds</tt> keyword is not present, the offsets are added to
-   the base address with silently-wrapping two's complement arithmetic. If the
-   offsets have a different width from the pointer, they are sign-extended or
-   truncated to the width of the pointer. The result value of the
-   <tt>getelementptr</tt> may be outside the object pointed to by the base
-   pointer. The result value may not necessarily be used to access memory
-   though, even if it happens to point into allocated storage. See the
-   <a href="#pointeraliasing">Pointer Aliasing Rules</a> section for more
-   information.</p>
-
-<p>The getelementptr instruction is often confusing.  For some more insight into
-   how it works, see <a href="GetElementPtr.html">the getelementptr FAQ</a>.</p>
-
-<h5>Example:</h5>
-<pre>
-    <i>; yields [12 x i8]*:aptr</i>
-    %aptr = getelementptr {i32, [12 x i8]}* %saptr, i64 0, i32 1
-    <i>; yields i8*:vptr</i>
-    %vptr = getelementptr {i32, &lt;2 x i8&gt;}* %svptr, i64 0, i32 1, i32 1
-    <i>; yields i8*:eptr</i>
-    %eptr = getelementptr [12 x i8]* %aptr, i64 0, i32 1
-    <i>; yields i32*:iptr</i>
-    %iptr = getelementptr [10 x i32]* @arr, i16 0, i16 0
-</pre>
-
-<p>In cases where the pointer argument is a vector of pointers, each index must
-   be a vector with the same number of elements.  For example: </p>
-<pre class="doc_code">
- %A = getelementptr <4 x i8*> %ptrs, <4 x i64> %offsets,
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="convertops">Conversion Operations</a>
-</h3>
-
-<div>
-
-<p>The instructions in this category are the conversion instructions (casting)
-   which all take a single operand and a type. They perform various bit
-   conversions on the operand.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_trunc">'<tt>trunc .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = trunc &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>trunc</tt>' instruction truncates its operand to the
-   type <tt>ty2</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>trunc</tt>' instruction takes a value to trunc, and a type to trunc it to.
-   Both types must be of <a href="#t_integer">integer</a> types, or vectors
-   of the same number of integers.
-   The bit size of the <tt>value</tt> must be larger than
-   the bit size of the destination type, <tt>ty2</tt>.
-   Equal sized types are not allowed.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>trunc</tt>' instruction truncates the high order bits
-   in <tt>value</tt> and converts the remaining bits to <tt>ty2</tt>. Since the
-   source size must be larger than the destination size, <tt>trunc</tt> cannot
-   be a <i>no-op cast</i>.  It will always truncate bits.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = trunc i32 257 to i8                        <i>; yields i8:1</i>
-  %Y = trunc i32 123 to i1                        <i>; yields i1:true</i>
-  %Z = trunc i32 122 to i1                        <i>; yields i1:false</i>
-  %W = trunc &lt;2 x i16&gt; &lt;i16 8, i16 7&gt; to &lt;2 x i8&gt; <i>; yields &lt;i8 8, i8 7&gt;</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_zext">'<tt>zext .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = zext &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>zext</tt>' instruction zero extends its operand to type
-   <tt>ty2</tt>.</p>
-
-
-<h5>Arguments:</h5>
-<p>The '<tt>zext</tt>' instruction takes a value to cast, and a type to cast it to.
-   Both types must be of <a href="#t_integer">integer</a> types, or vectors
-   of the same number of integers.
-   The bit size of the <tt>value</tt> must be smaller than
-   the bit size of the destination type,
-   <tt>ty2</tt>.</p>
-
-<h5>Semantics:</h5>
-<p>The <tt>zext</tt> fills the high order bits of the <tt>value</tt> with zero
-   bits until it reaches the size of the destination type, <tt>ty2</tt>.</p>
-
-<p>When zero extending from i1, the result will always be either 0 or 1.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = zext i32 257 to i64              <i>; yields i64:257</i>
-  %Y = zext i1 true to i32              <i>; yields i32:1</i>
-  %Z = zext &lt;2 x i16&gt; &lt;i16 8, i16 7&gt; to &lt;2 x i32&gt; <i>; yields &lt;i32 8, i32 7&gt;</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_sext">'<tt>sext .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = sext &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>sext</tt>' sign extends <tt>value</tt> to the type <tt>ty2</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>sext</tt>' instruction takes a value to cast, and a type to cast it to.
-   Both types must be of <a href="#t_integer">integer</a> types, or vectors
-   of the same number of integers.
-   The bit size of the <tt>value</tt> must be smaller than
-   the bit size of the destination type,
-   <tt>ty2</tt>.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>sext</tt>' instruction performs a sign extension by copying the sign
-   bit (highest order bit) of the <tt>value</tt> until it reaches the bit size
-   of the type <tt>ty2</tt>.</p>
-
-<p>When sign extending from i1, the extension always results in -1 or 0.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = sext i8  -1 to i16              <i>; yields i16   :65535</i>
-  %Y = sext i1 true to i32             <i>; yields i32:-1</i>
-  %Z = sext &lt;2 x i16&gt; &lt;i16 8, i16 7&gt; to &lt;2 x i32&gt; <i>; yields &lt;i32 8, i32 7&gt;</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_fptrunc">'<tt>fptrunc .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fptrunc &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fptrunc</tt>' instruction truncates <tt>value</tt> to type
-   <tt>ty2</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>fptrunc</tt>' instruction takes a <a href="#t_floating">floating
-   point</a> value to cast and a <a href="#t_floating">floating point</a> type
-   to cast it to. The size of <tt>value</tt> must be larger than the size of
-   <tt>ty2</tt>. This implies that <tt>fptrunc</tt> cannot be used to make a
-   <i>no-op cast</i>.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>fptrunc</tt>' instruction truncates a <tt>value</tt> from a larger
-   <a href="#t_floating">floating point</a> type to a smaller
-   <a href="#t_floating">floating point</a> type.  If the value cannot fit
-   within the destination type, <tt>ty2</tt>, then the results are
-   undefined.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = fptrunc double 123.0 to float         <i>; yields float:123.0</i>
-  %Y = fptrunc double 1.0E+300 to float      <i>; yields undefined</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_fpext">'<tt>fpext .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fpext &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fpext</tt>' extends a floating point <tt>value</tt> to a larger
-   floating point value.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>fpext</tt>' instruction takes a
-   <a href="#t_floating">floating point</a> <tt>value</tt> to cast, and
-   a <a href="#t_floating">floating point</a> type to cast it to. The source
-   type must be smaller than the destination type.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>fpext</tt>' instruction extends the <tt>value</tt> from a smaller
-   <a href="#t_floating">floating point</a> type to a larger
-   <a href="#t_floating">floating point</a> type. The <tt>fpext</tt> cannot be
-   used to make a <i>no-op cast</i> because it always changes bits. Use
-   <tt>bitcast</tt> to make a <i>no-op cast</i> for a floating point cast.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = fpext float 3.125 to double         <i>; yields double:3.125000e+00</i>
-  %Y = fpext double %X to fp128            <i>; yields fp128:0xL00000000000000004000900000000000</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_fptoui">'<tt>fptoui .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fptoui &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fptoui</tt>' converts a floating point <tt>value</tt> to its
-   unsigned integer equivalent of type <tt>ty2</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>fptoui</tt>' instruction takes a value to cast, which must be a
-   scalar or vector <a href="#t_floating">floating point</a> value, and a type
-   to cast it to <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a>
-   type. If <tt>ty</tt> is a vector floating point type, <tt>ty2</tt> must be a
-   vector integer type with the same number of elements as <tt>ty</tt></p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>fptoui</tt>' instruction converts its
-   <a href="#t_floating">floating point</a> operand into the nearest (rounding
-   towards zero) unsigned integer value. If the value cannot fit
-   in <tt>ty2</tt>, the results are undefined.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = fptoui double 123.0 to i32      <i>; yields i32:123</i>
-  %Y = fptoui float 1.0E+300 to i1     <i>; yields undefined:1</i>
-  %Z = fptoui float 1.04E+17 to i8     <i>; yields undefined:1</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_fptosi">'<tt>fptosi .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fptosi &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fptosi</tt>' instruction converts
-   <a href="#t_floating">floating point</a> <tt>value</tt> to
-   type <tt>ty2</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>fptosi</tt>' instruction takes a value to cast, which must be a
-   scalar or vector <a href="#t_floating">floating point</a> value, and a type
-   to cast it to <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a>
-   type. If <tt>ty</tt> is a vector floating point type, <tt>ty2</tt> must be a
-   vector integer type with the same number of elements as <tt>ty</tt></p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>fptosi</tt>' instruction converts its
-   <a href="#t_floating">floating point</a> operand into the nearest (rounding
-   towards zero) signed integer value. If the value cannot fit in <tt>ty2</tt>,
-   the results are undefined.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = fptosi double -123.0 to i32      <i>; yields i32:-123</i>
-  %Y = fptosi float 1.0E-247 to i1      <i>; yields undefined:1</i>
-  %Z = fptosi float 1.04E+17 to i8      <i>; yields undefined:1</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_uitofp">'<tt>uitofp .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = uitofp &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>uitofp</tt>' instruction regards <tt>value</tt> as an unsigned
-   integer and converts that value to the <tt>ty2</tt> type.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>uitofp</tt>' instruction takes a value to cast, which must be a
-   scalar or vector <a href="#t_integer">integer</a> value, and a type to cast
-   it to <tt>ty2</tt>, which must be an <a href="#t_floating">floating point</a>
-   type. If <tt>ty</tt> is a vector integer type, <tt>ty2</tt> must be a vector
-   floating point type with the same number of elements as <tt>ty</tt></p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>uitofp</tt>' instruction interprets its operand as an unsigned
-   integer quantity and converts it to the corresponding floating point
-   value. If the value cannot fit in the floating point value, the results are
-   undefined.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = uitofp i32 257 to float         <i>; yields float:257.0</i>
-  %Y = uitofp i8 -1 to double          <i>; yields double:255.0</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_sitofp">'<tt>sitofp .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = sitofp &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>sitofp</tt>' instruction regards <tt>value</tt> as a signed integer
-   and converts that value to the <tt>ty2</tt> type.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>sitofp</tt>' instruction takes a value to cast, which must be a
-   scalar or vector <a href="#t_integer">integer</a> value, and a type to cast
-   it to <tt>ty2</tt>, which must be an <a href="#t_floating">floating point</a>
-   type. If <tt>ty</tt> is a vector integer type, <tt>ty2</tt> must be a vector
-   floating point type with the same number of elements as <tt>ty</tt></p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>sitofp</tt>' instruction interprets its operand as a signed integer
-   quantity and converts it to the corresponding floating point value. If the
-   value cannot fit in the floating point value, the results are undefined.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = sitofp i32 257 to float         <i>; yields float:257.0</i>
-  %Y = sitofp i8 -1 to double          <i>; yields double:-1.0</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_ptrtoint">'<tt>ptrtoint .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = ptrtoint &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>ptrtoint</tt>' instruction converts the pointer or a vector of
-   pointers <tt>value</tt> to
-   the integer (or vector of integers) type <tt>ty2</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>ptrtoint</tt>' instruction takes a <tt>value</tt> to cast, which
-   must be a a value of type <a href="#t_pointer">pointer</a> or a vector of
-    pointers, and a type to cast it to
-   <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a> or a vector
-   of integers type.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>ptrtoint</tt>' instruction converts <tt>value</tt> to integer type
-   <tt>ty2</tt> by interpreting the pointer value as an integer and either
-   truncating or zero extending that value to the size of the integer type. If
-   <tt>value</tt> is smaller than <tt>ty2</tt> then a zero extension is done. If
-   <tt>value</tt> is larger than <tt>ty2</tt> then a truncation is done. If they
-   are the same size, then nothing is done (<i>no-op cast</i>) other than a type
-   change.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = ptrtoint i32* %P to i8                         <i>; yields truncation on 32-bit architecture</i>
-  %Y = ptrtoint i32* %P to i64                        <i>; yields zero extension on 32-bit architecture</i>
-  %Z = ptrtoint &lt;4 x i32*&gt; %P to &lt;4 x i64&gt;<i>; yields vector zero extension for a vector of addresses on 32-bit architecture</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_inttoptr">'<tt>inttoptr .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = inttoptr &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>inttoptr</tt>' instruction converts an integer <tt>value</tt> to a
-   pointer type, <tt>ty2</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>inttoptr</tt>' instruction takes an <a href="#t_integer">integer</a>
-   value to cast, and a type to cast it to, which must be a
-   <a href="#t_pointer">pointer</a> type.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>inttoptr</tt>' instruction converts <tt>value</tt> to type
-   <tt>ty2</tt> by applying either a zero extension or a truncation depending on
-   the size of the integer <tt>value</tt>. If <tt>value</tt> is larger than the
-   size of a pointer then a truncation is done. If <tt>value</tt> is smaller
-   than the size of a pointer then a zero extension is done. If they are the
-   same size, nothing is done (<i>no-op cast</i>).</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = inttoptr i32 255 to i32*          <i>; yields zero extension on 64-bit architecture</i>
-  %Y = inttoptr i32 255 to i32*          <i>; yields no-op on 32-bit architecture</i>
-  %Z = inttoptr i64 0 to i32*            <i>; yields truncation on 32-bit architecture</i>
-  %Z = inttoptr &lt;4 x i32&gt; %G to &lt;4 x i8*&gt;<i>; yields truncation of vector G to four pointers</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_bitcast">'<tt>bitcast .. to</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = bitcast &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>bitcast</tt>' instruction converts <tt>value</tt> to type
-   <tt>ty2</tt> without changing any bits.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>bitcast</tt>' instruction takes a value to cast, which must be a
-   non-aggregate first class value, and a type to cast it to, which must also be
-   a non-aggregate <a href="#t_firstclass">first class</a> type. The bit sizes
-   of <tt>value</tt> and the destination type, <tt>ty2</tt>, must be
-   identical. If the source type is a pointer, the destination type must also be
-   a pointer.  This instruction supports bitwise conversion of vectors to
-   integers and to vectors of other types (as long as they have the same
-   size).</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>bitcast</tt>' instruction converts <tt>value</tt> to type
-   <tt>ty2</tt>. It is always a <i>no-op cast</i> because no bits change with
-   this conversion.  The conversion is done as if the <tt>value</tt> had been
-   stored to memory and read back as type <tt>ty2</tt>.
-   Pointer (or vector of pointers) types may only be converted to other pointer
-   (or vector of pointers) types with this instruction. To convert
-   pointers to other types, use the <a href="#i_inttoptr">inttoptr</a> or
-   <a href="#i_ptrtoint">ptrtoint</a> instructions first.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = bitcast i8 255 to i8              <i>; yields i8 :-1</i>
-  %Y = bitcast i32* %x to sint*          <i>; yields sint*:%x</i>
-  %Z = bitcast &lt;2 x int&gt; %V to i64;        <i>; yields i64: %V</i>
-  %Z = bitcast &lt;2 x i32*&gt; %V to &lt;2 x i64*&gt; <i>; yields &lt;2 x i64*&gt;</i>
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="otherops">Other Operations</a>
-</h3>
-
-<div>
-
-<p>The instructions in this category are the "miscellaneous" instructions, which
-   defy better classification.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_icmp">'<tt>icmp</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = icmp &lt;cond&gt; &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {i1} or {&lt;N x i1&gt;}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>icmp</tt>' instruction returns a boolean value or a vector of
-   boolean values based on comparison of its two integer, integer vector,
-   pointer, or pointer vector operands.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>icmp</tt>' instruction takes three operands. The first operand is
-   the condition code indicating the kind of comparison to perform. It is not a
-   value, just a keyword. The possible condition code are:</p>
-
-<ol>
-  <li><tt>eq</tt>: equal</li>
-  <li><tt>ne</tt>: not equal </li>
-  <li><tt>ugt</tt>: unsigned greater than</li>
-  <li><tt>uge</tt>: unsigned greater or equal</li>
-  <li><tt>ult</tt>: unsigned less than</li>
-  <li><tt>ule</tt>: unsigned less or equal</li>
-  <li><tt>sgt</tt>: signed greater than</li>
-  <li><tt>sge</tt>: signed greater or equal</li>
-  <li><tt>slt</tt>: signed less than</li>
-  <li><tt>sle</tt>: signed less or equal</li>
-</ol>
-
-<p>The remaining two arguments must be <a href="#t_integer">integer</a> or
-   <a href="#t_pointer">pointer</a> or integer <a href="#t_vector">vector</a>
-   typed.  They must also be identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>icmp</tt>' compares <tt>op1</tt> and <tt>op2</tt> according to the
-   condition code given as <tt>cond</tt>. The comparison performed always yields
-   either an <a href="#t_integer"><tt>i1</tt></a> or vector of <tt>i1</tt>
-   result, as follows:</p>
-
-<ol>
-  <li><tt>eq</tt>: yields <tt>true</tt> if the operands are equal,
-      <tt>false</tt> otherwise. No sign interpretation is necessary or
-      performed.</li>
-
-  <li><tt>ne</tt>: yields <tt>true</tt> if the operands are unequal,
-      <tt>false</tt> otherwise. No sign interpretation is necessary or
-      performed.</li>
-
-  <li><tt>ugt</tt>: interprets the operands as unsigned values and yields
-      <tt>true</tt> if <tt>op1</tt> is greater than <tt>op2</tt>.</li>
-
-  <li><tt>uge</tt>: interprets the operands as unsigned values and yields
-      <tt>true</tt> if <tt>op1</tt> is greater than or equal
-      to <tt>op2</tt>.</li>
-
-  <li><tt>ult</tt>: interprets the operands as unsigned values and yields
-      <tt>true</tt> if <tt>op1</tt> is less than <tt>op2</tt>.</li>
-
-  <li><tt>ule</tt>: interprets the operands as unsigned values and yields
-      <tt>true</tt> if <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
-
-  <li><tt>sgt</tt>: interprets the operands as signed values and yields
-      <tt>true</tt> if <tt>op1</tt> is greater than <tt>op2</tt>.</li>
-
-  <li><tt>sge</tt>: interprets the operands as signed values and yields
-      <tt>true</tt> if <tt>op1</tt> is greater than or equal
-      to <tt>op2</tt>.</li>
-
-  <li><tt>slt</tt>: interprets the operands as signed values and yields
-      <tt>true</tt> if <tt>op1</tt> is less than <tt>op2</tt>.</li>
-
-  <li><tt>sle</tt>: interprets the operands as signed values and yields
-      <tt>true</tt> if <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
-</ol>
-
-<p>If the operands are <a href="#t_pointer">pointer</a> typed, the pointer
-   values are compared as if they were integers.</p>
-
-<p>If the operands are integer vectors, then they are compared element by
-   element. The result is an <tt>i1</tt> vector with the same number of elements
-   as the values being compared.  Otherwise, the result is an <tt>i1</tt>.</p>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = icmp eq i32 4, 5          <i>; yields: result=false</i>
-  &lt;result&gt; = icmp ne float* %X, %X     <i>; yields: result=false</i>
-  &lt;result&gt; = icmp ult i16  4, 5        <i>; yields: result=true</i>
-  &lt;result&gt; = icmp sgt i16  4, 5        <i>; yields: result=false</i>
-  &lt;result&gt; = icmp ule i16 -4, 5        <i>; yields: result=false</i>
-  &lt;result&gt; = icmp sge i16  4, 5        <i>; yields: result=false</i>
-</pre>
-
-<p>Note that the code generator does not yet support vector types with
-   the <tt>icmp</tt> instruction.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_fcmp">'<tt>fcmp</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = fcmp &lt;cond&gt; &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;     <i>; yields {i1} or {&lt;N x i1&gt;}:result</i>
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>fcmp</tt>' instruction returns a boolean value or vector of boolean
-   values based on comparison of its operands.</p>
-
-<p>If the operands are floating point scalars, then the result type is a boolean
-(<a href="#t_integer"><tt>i1</tt></a>).</p>
-
-<p>If the operands are floating point vectors, then the result type is a vector
-   of boolean with the same number of elements as the operands being
-   compared.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>fcmp</tt>' instruction takes three operands. The first operand is
-   the condition code indicating the kind of comparison to perform. It is not a
-   value, just a keyword. The possible condition code are:</p>
-
-<ol>
-  <li><tt>false</tt>: no comparison, always returns false</li>
-  <li><tt>oeq</tt>: ordered and equal</li>
-  <li><tt>ogt</tt>: ordered and greater than </li>
-  <li><tt>oge</tt>: ordered and greater than or equal</li>
-  <li><tt>olt</tt>: ordered and less than </li>
-  <li><tt>ole</tt>: ordered and less than or equal</li>
-  <li><tt>one</tt>: ordered and not equal</li>
-  <li><tt>ord</tt>: ordered (no nans)</li>
-  <li><tt>ueq</tt>: unordered or equal</li>
-  <li><tt>ugt</tt>: unordered or greater than </li>
-  <li><tt>uge</tt>: unordered or greater than or equal</li>
-  <li><tt>ult</tt>: unordered or less than </li>
-  <li><tt>ule</tt>: unordered or less than or equal</li>
-  <li><tt>une</tt>: unordered or not equal</li>
-  <li><tt>uno</tt>: unordered (either nans)</li>
-  <li><tt>true</tt>: no comparison, always returns true</li>
-</ol>
-
-<p><i>Ordered</i> means that neither operand is a QNAN while
-   <i>unordered</i> means that either operand may be a QNAN.</p>
-
-<p>Each of <tt>val1</tt> and <tt>val2</tt> arguments must be either
-   a <a href="#t_floating">floating point</a> type or
-   a <a href="#t_vector">vector</a> of floating point type.  They must have
-   identical types.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>fcmp</tt>' instruction compares <tt>op1</tt> and <tt>op2</tt>
-   according to the condition code given as <tt>cond</tt>.  If the operands are
-   vectors, then the vectors are compared element by element.  Each comparison
-   performed always yields an <a href="#t_integer">i1</a> result, as
-   follows:</p>
-
-<ol>
-  <li><tt>false</tt>: always yields <tt>false</tt>, regardless of operands.</li>
-
-  <li><tt>oeq</tt>: yields <tt>true</tt> if both operands are not a QNAN and
-      <tt>op1</tt> is equal to <tt>op2</tt>.</li>
-
-  <li><tt>ogt</tt>: yields <tt>true</tt> if both operands are not a QNAN and
-      <tt>op1</tt> is greater than <tt>op2</tt>.</li>
-
-  <li><tt>oge</tt>: yields <tt>true</tt> if both operands are not a QNAN and
-      <tt>op1</tt> is greater than or equal to <tt>op2</tt>.</li>
-
-  <li><tt>olt</tt>: yields <tt>true</tt> if both operands are not a QNAN and
-      <tt>op1</tt> is less than <tt>op2</tt>.</li>
-
-  <li><tt>ole</tt>: yields <tt>true</tt> if both operands are not a QNAN and
-      <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
-
-  <li><tt>one</tt>: yields <tt>true</tt> if both operands are not a QNAN and
-      <tt>op1</tt> is not equal to <tt>op2</tt>.</li>
-
-  <li><tt>ord</tt>: yields <tt>true</tt> if both operands are not a QNAN.</li>
-
-  <li><tt>ueq</tt>: yields <tt>true</tt> if either operand is a QNAN or
-      <tt>op1</tt> is equal to <tt>op2</tt>.</li>
-
-  <li><tt>ugt</tt>: yields <tt>true</tt> if either operand is a QNAN or
-      <tt>op1</tt> is greater than <tt>op2</tt>.</li>
-
-  <li><tt>uge</tt>: yields <tt>true</tt> if either operand is a QNAN or
-      <tt>op1</tt> is greater than or equal to <tt>op2</tt>.</li>
-
-  <li><tt>ult</tt>: yields <tt>true</tt> if either operand is a QNAN or
-      <tt>op1</tt> is less than <tt>op2</tt>.</li>
-
-  <li><tt>ule</tt>: yields <tt>true</tt> if either operand is a QNAN or
-      <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
-
-  <li><tt>une</tt>: yields <tt>true</tt> if either operand is a QNAN or
-      <tt>op1</tt> is not equal to <tt>op2</tt>.</li>
-
-  <li><tt>uno</tt>: yields <tt>true</tt> if either operand is a QNAN.</li>
-
-  <li><tt>true</tt>: always yields <tt>true</tt>, regardless of operands.</li>
-</ol>
-
-<h5>Example:</h5>
-<pre>
-  &lt;result&gt; = fcmp oeq float 4.0, 5.0    <i>; yields: result=false</i>
-  &lt;result&gt; = fcmp one float 4.0, 5.0    <i>; yields: result=true</i>
-  &lt;result&gt; = fcmp olt float 4.0, 5.0    <i>; yields: result=true</i>
-  &lt;result&gt; = fcmp ueq double 1.0, 2.0   <i>; yields: result=false</i>
-</pre>
-
-<p>Note that the code generator does not yet support vector types with
-   the <tt>fcmp</tt> instruction.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_phi">'<tt>phi</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = phi &lt;ty&gt; [ &lt;val0&gt;, &lt;label0&gt;], ...
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>phi</tt>' instruction is used to implement the &#966; node in the
-   SSA graph representing the function.</p>
-
-<h5>Arguments:</h5>
-<p>The type of the incoming values is specified with the first type field. After
-   this, the '<tt>phi</tt>' instruction takes a list of pairs as arguments, with
-   one pair for each predecessor basic block of the current block.  Only values
-   of <a href="#t_firstclass">first class</a> type may be used as the value
-   arguments to the PHI node.  Only labels may be used as the label
-   arguments.</p>
-
-<p>There must be no non-phi instructions between the start of a basic block and
-   the PHI instructions: i.e. PHI instructions must be first in a basic
-   block.</p>
-
-<p>For the purposes of the SSA form, the use of each incoming value is deemed to
-   occur on the edge from the corresponding predecessor block to the current
-   block (but after any definition of an '<tt>invoke</tt>' instruction's return
-   value on the same edge).</p>
-
-<h5>Semantics:</h5>
-<p>At runtime, the '<tt>phi</tt>' instruction logically takes on the value
-   specified by the pair corresponding to the predecessor basic block that
-   executed just prior to the current block.</p>
-
-<h5>Example:</h5>
-<pre>
-Loop:       ; Infinite loop that counts from 0 on up...
-  %indvar = phi i32 [ 0, %LoopHeader ], [ %nextindvar, %Loop ]
-  %nextindvar = add i32 %indvar, 1
-  br label %Loop
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-   <a name="i_select">'<tt>select</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = select <i>selty</i> &lt;cond&gt;, &lt;ty&gt; &lt;val1&gt;, &lt;ty&gt; &lt;val2&gt;             <i>; yields ty</i>
-
-  <i>selty</i> is either i1 or {&lt;N x i1&gt;}
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>select</tt>' instruction is used to choose one value based on a
-   condition, without branching.</p>
-
-
-<h5>Arguments:</h5>
-<p>The '<tt>select</tt>' instruction requires an 'i1' value or a vector of 'i1'
-   values indicating the condition, and two values of the
-   same <a href="#t_firstclass">first class</a> type.  If the val1/val2 are
-   vectors and the condition is a scalar, then entire vectors are selected, not
-   individual elements.</p>
-
-<h5>Semantics:</h5>
-<p>If the condition is an i1 and it evaluates to 1, the instruction returns the
-   first value argument; otherwise, it returns the second value argument.</p>
-
-<p>If the condition is a vector of i1, then the value arguments must be vectors
-   of the same size, and the selection is done element by element.</p>
-
-<h5>Example:</h5>
-<pre>
-  %X = select i1 true, i8 17, i8 42          <i>; yields i8:17</i>
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_call">'<tt>call</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;result&gt; = [tail] call [<a href="#callingconv">cconv</a>] [<a href="#paramattrs">ret attrs</a>] &lt;ty&gt; [&lt;fnty&gt;*] &lt;fnptrval&gt;(&lt;function args&gt;) [<a href="#fnattrs">fn attrs</a>]
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>call</tt>' instruction represents a simple function call.</p>
-
-<h5>Arguments:</h5>
-<p>This instruction requires several arguments:</p>
-
-<ol>
-  <li>The optional "tail" marker indicates that the callee function does not
-      access any allocas or varargs in the caller.  Note that calls may be
-      marked "tail" even if they do not occur before
-      a <a href="#i_ret"><tt>ret</tt></a> instruction.  If the "tail" marker is
-      present, the function call is eligible for tail call optimization,
-      but <a href="CodeGenerator.html#tailcallopt">might not in fact be
-      optimized into a jump</a>.  The code generator may optimize calls marked
-      "tail" with either 1) automatic <a href="CodeGenerator.html#sibcallopt">
-      sibling call optimization</a> when the caller and callee have
-      matching signatures, or 2) forced tail call optimization when the
-      following extra requirements are met:
-      <ul>
-        <li>Caller and callee both have the calling
-            convention <tt>fastcc</tt>.</li>
-        <li>The call is in tail position (ret immediately follows call and ret
-            uses value of call or is void).</li>
-        <li>Option <tt>-tailcallopt</tt> is enabled,
-            or <code>llvm::GuaranteedTailCallOpt</code> is <code>true</code>.</li>
-        <li><a href="CodeGenerator.html#tailcallopt">Platform specific
-            constraints are met.</a></li>
-      </ul>
-  </li>
-
-  <li>The optional "cconv" marker indicates which <a href="#callingconv">calling
-      convention</a> the call should use.  If none is specified, the call
-      defaults to using C calling conventions.  The calling convention of the
-      call must match the calling convention of the target function, or else the
-      behavior is undefined.</li>
-
-  <li>The optional <a href="#paramattrs">Parameter Attributes</a> list for
-      return values. Only '<tt>zeroext</tt>', '<tt>signext</tt>', and
-      '<tt>inreg</tt>' attributes are valid here.</li>
-
-  <li>'<tt>ty</tt>': the type of the call instruction itself which is also the
-      type of the return value.  Functions that return no value are marked
-      <tt><a href="#t_void">void</a></tt>.</li>
-
-  <li>'<tt>fnty</tt>': shall be the signature of the pointer to function value
-      being invoked.  The argument types must match the types implied by this
-      signature.  This type can be omitted if the function is not varargs and if
-      the function type does not return a pointer to a function.</li>
-
-  <li>'<tt>fnptrval</tt>': An LLVM value containing a pointer to a function to
-      be invoked. In most cases, this is a direct function invocation, but
-      indirect <tt>call</tt>s are just as possible, calling an arbitrary pointer
-      to function value.</li>
-
-  <li>'<tt>function args</tt>': argument list whose types match the function
-      signature argument types and parameter attributes. All arguments must be
-      of <a href="#t_firstclass">first class</a> type. If the function
-      signature indicates the function accepts a variable number of arguments,
-      the extra arguments can be specified.</li>
-
-  <li>The optional <a href="#fnattrs">function attributes</a> list. Only
-      '<tt>noreturn</tt>', '<tt>nounwind</tt>', '<tt>readonly</tt>' and
-      '<tt>readnone</tt>' attributes are valid here.</li>
-</ol>
-
-<h5>Semantics:</h5>
-<p>The '<tt>call</tt>' instruction is used to cause control flow to transfer to
-   a specified function, with its incoming arguments bound to the specified
-   values. Upon a '<tt><a href="#i_ret">ret</a></tt>' instruction in the called
-   function, control flow continues with the instruction after the function
-   call, and the return value of the function is bound to the result
-   argument.</p>
-
-<h5>Example:</h5>
-<pre>
-  %retval = call i32 @test(i32 %argc)
-  call i32 (i8*, ...)* @printf(i8* %msg, i32 12, i8 42)        <i>; yields i32</i>
-  %X = tail call i32 @foo()                                    <i>; yields i32</i>
-  %Y = tail call <a href="#callingconv">fastcc</a> i32 @foo()  <i>; yields i32</i>
-  call void %foo(i8 97 signext)
-
-  %struct.A = type { i32, i8 }
-  %r = call %struct.A @foo()                        <i>; yields { 32, i8 }</i>
-  %gr = extractvalue %struct.A %r, 0                <i>; yields i32</i>
-  %gr1 = extractvalue %struct.A %r, 1               <i>; yields i8</i>
-  %Z = call void @foo() noreturn                    <i>; indicates that %foo never returns normally</i>
-  %ZZ = call zeroext i32 @bar()                     <i>; Return value is %zero extended</i>
-</pre>
-
-<p>llvm treats calls to some functions with names and arguments that match the
-standard C99 library as being the C99 library functions, and may perform
-optimizations or generate code for them under that assumption.  This is
-something we'd like to change in the future to provide better support for
-freestanding environments and non-C-based languages.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_va_arg">'<tt>va_arg</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;resultval&gt; = va_arg &lt;va_list*&gt; &lt;arglist&gt;, &lt;argty&gt;
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>va_arg</tt>' instruction is used to access arguments passed through
-   the "variable argument" area of a function call.  It is used to implement the
-   <tt>va_arg</tt> macro in C.</p>
-
-<h5>Arguments:</h5>
-<p>This instruction takes a <tt>va_list*</tt> value and the type of the
-   argument. It returns a value of the specified argument type and increments
-   the <tt>va_list</tt> to point to the next argument.  The actual type
-   of <tt>va_list</tt> is target specific.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>va_arg</tt>' instruction loads an argument of the specified type
-   from the specified <tt>va_list</tt> and causes the <tt>va_list</tt> to point
-   to the next argument.  For more information, see the variable argument
-   handling <a href="#int_varargs">Intrinsic Functions</a>.</p>
-
-<p>It is legal for this instruction to be called in a function which does not
-   take a variable number of arguments, for example, the <tt>vfprintf</tt>
-   function.</p>
-
-<p><tt>va_arg</tt> is an LLVM instruction instead of
-   an <a href="#intrinsics">intrinsic function</a> because it takes a type as an
-   argument.</p>
-
-<h5>Example:</h5>
-<p>See the <a href="#int_varargs">variable argument processing</a> section.</p>
-
-<p>Note that the code generator does not yet fully support va_arg on many
-   targets. Also, it does not currently support va_arg with aggregate types on
-   any target.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="i_landingpad">'<tt>landingpad</tt>' Instruction</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  &lt;resultval&gt; = landingpad &lt;resultty&gt; personality &lt;type&gt; &lt;pers_fn&gt; &lt;clause&gt;+
-  &lt;resultval&gt; = landingpad &lt;resultty&gt; personality &lt;type&gt; &lt;pers_fn&gt; cleanup &lt;clause&gt;*
-
-  &lt;clause&gt; := catch &lt;type&gt; &lt;value&gt;
-  &lt;clause&gt; := filter &lt;array constant type&gt; &lt;array constant&gt;
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>landingpad</tt>' instruction is used by
-   <a href="ExceptionHandling.html#overview">LLVM's exception handling
-   system</a> to specify that a basic block is a landing pad &mdash; one where
-   the exception lands, and corresponds to the code found in the
-   <i><tt>catch</tt></i> portion of a <i><tt>try/catch</tt></i> sequence. It
-   defines values supplied by the personality function (<tt>pers_fn</tt>) upon
-   re-entry to the function. The <tt>resultval</tt> has the
-   type <tt>resultty</tt>.</p>
-
-<h5>Arguments:</h5>
-<p>This instruction takes a <tt>pers_fn</tt> value. This is the personality
-   function associated with the unwinding mechanism. The optional
-   <tt>cleanup</tt> flag indicates that the landing pad block is a cleanup.</p>
-
-<p>A <tt>clause</tt> begins with the clause type &mdash; <tt>catch</tt>
-   or <tt>filter</tt> &mdash; and contains the global variable representing the
-   "type" that may be caught or filtered respectively. Unlike the
-   <tt>catch</tt> clause, the <tt>filter</tt> clause takes an array constant as
-   its argument. Use "<tt>[0 x i8**] undef</tt>" for a filter which cannot
-   throw. The '<tt>landingpad</tt>' instruction must contain <em>at least</em>
-   one <tt>clause</tt> or the <tt>cleanup</tt> flag.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>landingpad</tt>' instruction defines the values which are set by the
-   personality function (<tt>pers_fn</tt>) upon re-entry to the function, and
-   therefore the "result type" of the <tt>landingpad</tt> instruction. As with
-   calling conventions, how the personality function results are represented in
-   LLVM IR is target specific.</p>
-
-<p>The clauses are applied in order from top to bottom. If two
-   <tt>landingpad</tt> instructions are merged together through inlining, the
-   clauses from the calling function are appended to the list of clauses.
-   When the call stack is being unwound due to an exception being thrown, the
-   exception is compared against each <tt>clause</tt> in turn.  If it doesn't
-   match any of the clauses, and the <tt>cleanup</tt> flag is not set, then
-   unwinding continues further up the call stack.</p>
-
-<p>The <tt>landingpad</tt> instruction has several restrictions:</p>
-
-<ul>
-  <li>A landing pad block is a basic block which is the unwind destination of an
-      '<tt>invoke</tt>' instruction.</li>
-  <li>A landing pad block must have a '<tt>landingpad</tt>' instruction as its
-      first non-PHI instruction.</li>
-  <li>There can be only one '<tt>landingpad</tt>' instruction within the landing
-      pad block.</li>
-  <li>A basic block that is not a landing pad block may not include a
-      '<tt>landingpad</tt>' instruction.</li>
-  <li>All '<tt>landingpad</tt>' instructions in a function must have the same
-      personality function.</li>
-</ul>
-
-<h5>Example:</h5>
-<pre>
-  ;; A landing pad which can catch an integer.
-  %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
-           catch i8** @_ZTIi
-  ;; A landing pad that is a cleanup.
-  %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
-           cleanup
-  ;; A landing pad which can catch an integer and can only throw a double.
-  %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
-           catch i8** @_ZTIi
-           filter [1 x i8**] [@_ZTId]
-</pre>
-
-</div>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intrinsics">Intrinsic Functions</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>LLVM supports the notion of an "intrinsic function".  These functions have
-   well known names and semantics and are required to follow certain
-   restrictions.  Overall, these intrinsics represent an extension mechanism for
-   the LLVM language that does not require changing all of the transformations
-   in LLVM when adding to the language (or the bitcode reader/writer, the
-   parser, etc...).</p>
-
-<p>Intrinsic function names must all start with an "<tt>llvm.</tt>" prefix. This
-   prefix is reserved in LLVM for intrinsic names; thus, function names may not
-   begin with this prefix.  Intrinsic functions must always be external
-   functions: you cannot define the body of intrinsic functions.  Intrinsic
-   functions may only be used in call or invoke instructions: it is illegal to
-   take the address of an intrinsic function.  Additionally, because intrinsic
-   functions are part of the LLVM language, it is required if any are added that
-   they be documented here.</p>
-
-<p>Some intrinsic functions can be overloaded, i.e., the intrinsic represents a
-   family of functions that perform the same operation but on different data
-   types. Because LLVM can represent over 8 million different integer types,
-   overloading is used commonly to allow an intrinsic function to operate on any
-   integer type. One or more of the argument types or the result type can be
-   overloaded to accept any integer type. Argument types may also be defined as
-   exactly matching a previous argument's type or the result type. This allows
-   an intrinsic function which accepts multiple arguments, but needs all of them
-   to be of the same type, to only be overloaded with respect to a single
-   argument or the result.</p>
-
-<p>Overloaded intrinsics will have the names of its overloaded argument types
-   encoded into its function name, each preceded by a period. Only those types
-   which are overloaded result in a name suffix. Arguments whose type is matched
-   against another type do not. For example, the <tt>llvm.ctpop</tt> function
-   can take an integer of any width and returns an integer of exactly the same
-   integer width. This leads to a family of functions such as
-   <tt>i8 @llvm.ctpop.i8(i8 %val)</tt> and <tt>i29 @llvm.ctpop.i29(i29
-   %val)</tt>.  Only one type, the return type, is overloaded, and only one type
-   suffix is required. Because the argument's type is matched against the return
-   type, it does not require its own name suffix.</p>
-
-<p>To learn how to add an intrinsic function, please see the
-   <a href="ExtendingLLVM.html">Extending LLVM Guide</a>.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_varargs">Variable Argument Handling Intrinsics</a>
-</h3>
-
-<div>
-
-<p>Variable argument support is defined in LLVM with
-   the <a href="#i_va_arg"><tt>va_arg</tt></a> instruction and these three
-   intrinsic functions.  These functions are related to the similarly named
-   macros defined in the <tt>&lt;stdarg.h&gt;</tt> header file.</p>
-
-<p>All of these functions operate on arguments that use a target-specific value
-   type "<tt>va_list</tt>".  The LLVM assembly language reference manual does
-   not define what this type is, so all transformations should be prepared to
-   handle these functions regardless of the type used.</p>
-
-<p>This example shows how the <a href="#i_va_arg"><tt>va_arg</tt></a>
-   instruction and the variable argument handling intrinsic functions are
-   used.</p>
-
-<pre class="doc_code">
-define i32 @test(i32 %X, ...) {
-  ; Initialize variable argument processing
-  %ap = alloca i8*
-  %ap2 = bitcast i8** %ap to i8*
-  call void @llvm.va_start(i8* %ap2)
-
-  ; Read a single integer argument
-  %tmp = va_arg i8** %ap, i32
-
-  ; Demonstrate usage of llvm.va_copy and llvm.va_end
-  %aq = alloca i8*
-  %aq2 = bitcast i8** %aq to i8*
-  call void @llvm.va_copy(i8* %aq2, i8* %ap2)
-  call void @llvm.va_end(i8* %aq2)
-
-  ; Stop processing of arguments.
-  call void @llvm.va_end(i8* %ap2)
-  ret i32 %tmp
-}
-
-declare void @llvm.va_start(i8*)
-declare void @llvm.va_copy(i8*, i8*)
-declare void @llvm.va_end(i8*)
-</pre>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_va_start">'<tt>llvm.va_start</tt>' Intrinsic</a>
-</h4>
-
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void %llvm.va_start(i8* &lt;arglist&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.va_start</tt>' intrinsic initializes <tt>*&lt;arglist&gt;</tt>
-   for subsequent use by <tt><a href="#i_va_arg">va_arg</a></tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The argument is a pointer to a <tt>va_list</tt> element to initialize.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.va_start</tt>' intrinsic works just like the <tt>va_start</tt>
-   macro available in C.  In a target-dependent way, it initializes
-   the <tt>va_list</tt> element to which the argument points, so that the next
-   call to <tt>va_arg</tt> will produce the first variable argument passed to
-   the function.  Unlike the C <tt>va_start</tt> macro, this intrinsic does not
-   need to know the last argument of the function as the compiler can figure
-   that out.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
- <a name="int_va_end">'<tt>llvm.va_end</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.va_end(i8* &lt;arglist&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.va_end</tt>' intrinsic destroys <tt>*&lt;arglist&gt;</tt>,
-   which has been initialized previously
-   with <tt><a href="#int_va_start">llvm.va_start</a></tt>
-   or <tt><a href="#i_va_copy">llvm.va_copy</a></tt>.</p>
-
-<h5>Arguments:</h5>
-<p>The argument is a pointer to a <tt>va_list</tt> to destroy.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.va_end</tt>' intrinsic works just like the <tt>va_end</tt>
-   macro available in C.  In a target-dependent way, it destroys
-   the <tt>va_list</tt> element to which the argument points.  Calls
-   to <a href="#int_va_start"><tt>llvm.va_start</tt></a>
-   and <a href="#int_va_copy"> <tt>llvm.va_copy</tt></a> must be matched exactly
-   with calls to <tt>llvm.va_end</tt>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_va_copy">'<tt>llvm.va_copy</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.va_copy(i8* &lt;destarglist&gt;, i8* &lt;srcarglist&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.va_copy</tt>' intrinsic copies the current argument position
-   from the source argument list to the destination argument list.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is a pointer to a <tt>va_list</tt> element to initialize.
-   The second argument is a pointer to a <tt>va_list</tt> element to copy
-   from.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.va_copy</tt>' intrinsic works just like the <tt>va_copy</tt>
-   macro available in C.  In a target-dependent way, it copies the
-   source <tt>va_list</tt> element into the destination <tt>va_list</tt>
-   element.  This intrinsic is necessary because
-   the <tt><a href="#int_va_start"> llvm.va_start</a></tt> intrinsic may be
-   arbitrarily complex and require, for example, memory allocation.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_gc">Accurate Garbage Collection Intrinsics</a>
-</h3>
-
-<div>
-
-<p>LLVM support for <a href="GarbageCollection.html">Accurate Garbage
-Collection</a> (GC) requires the implementation and generation of these
-intrinsics. These intrinsics allow identification of <a href="#int_gcroot">GC
-roots on the stack</a>, as well as garbage collector implementations that
-require <a href="#int_gcread">read</a> and <a href="#int_gcwrite">write</a>
-barriers.  Front-ends for type-safe garbage collected languages should generate
-these intrinsics to make use of the LLVM garbage collectors.  For more details,
-see <a href="GarbageCollection.html">Accurate Garbage Collection with
-LLVM</a>.</p>
-
-<p>The garbage collection intrinsics only operate on objects in the generic
-   address space (address space zero).</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_gcroot">'<tt>llvm.gcroot</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.gcroot(i8** %ptrloc, i8* %metadata)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.gcroot</tt>' intrinsic declares the existence of a GC root to
-   the code generator, and allows some metadata to be associated with it.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument specifies the address of a stack object that contains the
-   root pointer.  The second pointer (which must be either a constant or a
-   global value address) contains the meta-data to be associated with the
-   root.</p>
-
-<h5>Semantics:</h5>
-<p>At runtime, a call to this intrinsic stores a null pointer into the "ptrloc"
-   location.  At compile-time, the code generator generates information to allow
-   the runtime to find the pointer at GC safe points. The '<tt>llvm.gcroot</tt>'
-   intrinsic may only be used in a function which <a href="#gc">specifies a GC
-   algorithm</a>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_gcread">'<tt>llvm.gcread</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i8* @llvm.gcread(i8* %ObjPtr, i8** %Ptr)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.gcread</tt>' intrinsic identifies reads of references from heap
-   locations, allowing garbage collector implementations that require read
-   barriers.</p>
-
-<h5>Arguments:</h5>
-<p>The second argument is the address to read from, which should be an address
-   allocated from the garbage collector.  The first object is a pointer to the
-   start of the referenced object, if needed by the language runtime (otherwise
-   null).</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.gcread</tt>' intrinsic has the same semantics as a load
-   instruction, but may be replaced with substantially more complex code by the
-   garbage collector runtime, as needed. The '<tt>llvm.gcread</tt>' intrinsic
-   may only be used in a function which <a href="#gc">specifies a GC
-   algorithm</a>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_gcwrite">'<tt>llvm.gcwrite</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.gcwrite(i8* %P1, i8* %Obj, i8** %P2)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.gcwrite</tt>' intrinsic identifies writes of references to heap
-   locations, allowing garbage collector implementations that require write
-   barriers (such as generational or reference counting collectors).</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is the reference to store, the second is the start of the
-   object to store it to, and the third is the address of the field of Obj to
-   store to.  If the runtime does not require a pointer to the object, Obj may
-   be null.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.gcwrite</tt>' intrinsic has the same semantics as a store
-   instruction, but may be replaced with substantially more complex code by the
-   garbage collector runtime, as needed. The '<tt>llvm.gcwrite</tt>' intrinsic
-   may only be used in a function which <a href="#gc">specifies a GC
-   algorithm</a>.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_codegen">Code Generator Intrinsics</a>
-</h3>
-
-<div>
-
-<p>These intrinsics are provided by LLVM to expose special features that may
-   only be implemented with code generator support.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_returnaddress">'<tt>llvm.returnaddress</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i8  *@llvm.returnaddress(i32 &lt;level&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.returnaddress</tt>' intrinsic attempts to compute a
-   target-specific value indicating the return address of the current function
-   or one of its callers.</p>
-
-<h5>Arguments:</h5>
-<p>The argument to this intrinsic indicates which function to return the address
-   for.  Zero indicates the calling function, one indicates its caller, etc.
-   The argument is <b>required</b> to be a constant integer value.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.returnaddress</tt>' intrinsic either returns a pointer
-   indicating the return address of the specified call frame, or zero if it
-   cannot be identified.  The value returned by this intrinsic is likely to be
-   incorrect or 0 for arguments other than zero, so it should only be used for
-   debugging purposes.</p>
-
-<p>Note that calling this intrinsic does not prevent function inlining or other
-   aggressive transformations, so the value returned may not be that of the
-   obvious source-language caller.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_frameaddress">'<tt>llvm.frameaddress</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i8* @llvm.frameaddress(i32 &lt;level&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.frameaddress</tt>' intrinsic attempts to return the
-   target-specific frame pointer value for the specified stack frame.</p>
-
-<h5>Arguments:</h5>
-<p>The argument to this intrinsic indicates which function to return the frame
-   pointer for.  Zero indicates the calling function, one indicates its caller,
-   etc.  The argument is <b>required</b> to be a constant integer value.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.frameaddress</tt>' intrinsic either returns a pointer
-   indicating the frame address of the specified call frame, or zero if it
-   cannot be identified.  The value returned by this intrinsic is likely to be
-   incorrect or 0 for arguments other than zero, so it should only be used for
-   debugging purposes.</p>
-
-<p>Note that calling this intrinsic does not prevent function inlining or other
-   aggressive transformations, so the value returned may not be that of the
-   obvious source-language caller.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_stacksave">'<tt>llvm.stacksave</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i8* @llvm.stacksave()
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.stacksave</tt>' intrinsic is used to remember the current state
-   of the function stack, for use
-   with <a href="#int_stackrestore"> <tt>llvm.stackrestore</tt></a>.  This is
-   useful for implementing language features like scoped automatic variable
-   sized arrays in C99.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic returns a opaque pointer value that can be passed
-   to <a href="#int_stackrestore"><tt>llvm.stackrestore</tt></a>.  When
-   an <tt>llvm.stackrestore</tt> intrinsic is executed with a value saved
-   from <tt>llvm.stacksave</tt>, it effectively restores the state of the stack
-   to the state it was in when the <tt>llvm.stacksave</tt> intrinsic executed.
-   In practice, this pops any <a href="#i_alloca">alloca</a> blocks from the
-   stack that were allocated after the <tt>llvm.stacksave</tt> was executed.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_stackrestore">'<tt>llvm.stackrestore</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.stackrestore(i8* %ptr)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.stackrestore</tt>' intrinsic is used to restore the state of
-   the function stack to the state it was in when the
-   corresponding <a href="#int_stacksave"><tt>llvm.stacksave</tt></a> intrinsic
-   executed.  This is useful for implementing language features like scoped
-   automatic variable sized arrays in C99.</p>
-
-<h5>Semantics:</h5>
-<p>See the description
-   for <a href="#int_stacksave"><tt>llvm.stacksave</tt></a>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_prefetch">'<tt>llvm.prefetch</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.prefetch(i8* &lt;address&gt;, i32 &lt;rw&gt;, i32 &lt;locality&gt;, i32 &lt;cache type&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.prefetch</tt>' intrinsic is a hint to the code generator to
-   insert a prefetch instruction if supported; otherwise, it is a noop.
-   Prefetches have no effect on the behavior of the program but can change its
-   performance characteristics.</p>
-
-<h5>Arguments:</h5>
-<p><tt>address</tt> is the address to be prefetched, <tt>rw</tt> is the
-   specifier determining if the fetch should be for a read (0) or write (1),
-   and <tt>locality</tt> is a temporal locality specifier ranging from (0) - no
-   locality, to (3) - extremely local keep in cache. The <tt>cache type</tt>
-   specifies whether the prefetch is performed on the data (1) or instruction (0)
-   cache. The <tt>rw</tt>, <tt>locality</tt> and <tt>cache type</tt> arguments
-   must be constant integers.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic does not modify the behavior of the program.  In particular,
-   prefetches cannot trap and do not produce a value.  On targets that support
-   this intrinsic, the prefetch can provide hints to the processor cache for
-   better performance.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_pcmarker">'<tt>llvm.pcmarker</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.pcmarker(i32 &lt;id&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.pcmarker</tt>' intrinsic is a method to export a Program
-   Counter (PC) in a region of code to simulators and other tools.  The method
-   is target specific, but it is expected that the marker will use exported
-   symbols to transmit the PC of the marker.  The marker makes no guarantees
-   that it will remain with any specific instruction after optimizations.  It is
-   possible that the presence of a marker will inhibit optimizations.  The
-   intended use is to be inserted after optimizations to allow correlations of
-   simulation runs.</p>
-
-<h5>Arguments:</h5>
-<p><tt>id</tt> is a numerical id identifying the marker.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic does not modify the behavior of the program.  Backends that do
-   not support this intrinsic may ignore it.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_readcyclecounter">'<tt>llvm.readcyclecounter</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i64 @llvm.readcyclecounter()
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.readcyclecounter</tt>' intrinsic provides access to the cycle
-   counter register (or similar low latency, high accuracy clocks) on those
-   targets that support it.  On X86, it should map to RDTSC.  On Alpha, it
-   should map to RPCC.  As the backing counters overflow quickly (on the order
-   of 9 seconds on alpha), this should only be used for small timings.</p>
-
-<h5>Semantics:</h5>
-<p>When directly supported, reading the cycle counter should not modify any
-   memory.  Implementations are allowed to either return a application specific
-   value or a system wide value.  On backends without support, this is lowered
-   to a constant 0.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_libc">Standard C Library Intrinsics</a>
-</h3>
-
-<div>
-
-<p>LLVM provides intrinsics for a few important standard C library functions.
-   These intrinsics allow source-language front-ends to pass information about
-   the alignment of the pointer arguments to the code generator, providing
-   opportunity for more efficient code generation.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_memcpy">'<tt>llvm.memcpy</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.memcpy</tt> on any
-   integer bit width and for different address spaces. Not all targets support
-   all bit widths however.</p>
-
-<pre>
-  declare void @llvm.memcpy.p0i8.p0i8.i32(i8* &lt;dest&gt;, i8* &lt;src&gt;,
-                                          i32 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
-  declare void @llvm.memcpy.p0i8.p0i8.i64(i8* &lt;dest&gt;, i8* &lt;src&gt;,
-                                          i64 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.memcpy.*</tt>' intrinsics copy a block of memory from the
-   source location to the destination location.</p>
-
-<p>Note that, unlike the standard libc function, the <tt>llvm.memcpy.*</tt>
-   intrinsics do not return a value, takes extra alignment/isvolatile arguments
-   and the pointers can be in specified address spaces.</p>
-
-<h5>Arguments:</h5>
-
-<p>The first argument is a pointer to the destination, the second is a pointer
-   to the source.  The third argument is an integer argument specifying the
-   number of bytes to copy, the fourth argument is the alignment of the
-   source and destination locations, and the fifth is a boolean indicating a
-   volatile access.</p>
-
-<p>If the call to this intrinsic has an alignment value that is not 0 or 1,
-   then the caller guarantees that both the source and destination pointers are
-   aligned to that boundary.</p>
-
-<p>If the <tt>isvolatile</tt> parameter is <tt>true</tt>, the
-   <tt>llvm.memcpy</tt> call is a <a href="#volatile">volatile operation</a>.
-   The detailed access behavior is not very cleanly specified and it is unwise
-   to depend on it.</p>
-
-<h5>Semantics:</h5>
-
-<p>The '<tt>llvm.memcpy.*</tt>' intrinsics copy a block of memory from the
-   source location to the destination location, which are not allowed to
-   overlap.  It copies "len" bytes of memory over.  If the argument is known to
-   be aligned to some boundary, this can be specified as the fourth argument,
-   otherwise it should be set to 0 or 1.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_memmove">'<tt>llvm.memmove</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use llvm.memmove on any integer bit
-   width and for different address space. Not all targets support all bit
-   widths however.</p>
-
-<pre>
-  declare void @llvm.memmove.p0i8.p0i8.i32(i8* &lt;dest&gt;, i8* &lt;src&gt;,
-                                           i32 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
-  declare void @llvm.memmove.p0i8.p0i8.i64(i8* &lt;dest&gt;, i8* &lt;src&gt;,
-                                           i64 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.memmove.*</tt>' intrinsics move a block of memory from the
-   source location to the destination location. It is similar to the
-   '<tt>llvm.memcpy</tt>' intrinsic but allows the two memory locations to
-   overlap.</p>
-
-<p>Note that, unlike the standard libc function, the <tt>llvm.memmove.*</tt>
-   intrinsics do not return a value, takes extra alignment/isvolatile arguments
-   and the pointers can be in specified address spaces.</p>
-
-<h5>Arguments:</h5>
-
-<p>The first argument is a pointer to the destination, the second is a pointer
-   to the source.  The third argument is an integer argument specifying the
-   number of bytes to copy, the fourth argument is the alignment of the
-   source and destination locations, and the fifth is a boolean indicating a
-   volatile access.</p>
-
-<p>If the call to this intrinsic has an alignment value that is not 0 or 1,
-   then the caller guarantees that the source and destination pointers are
-   aligned to that boundary.</p>
-
-<p>If the <tt>isvolatile</tt> parameter is <tt>true</tt>, the
-   <tt>llvm.memmove</tt> call is a <a href="#volatile">volatile operation</a>.
-   The detailed access behavior is not very cleanly specified and it is unwise
-   to depend on it.</p>
-
-<h5>Semantics:</h5>
-
-<p>The '<tt>llvm.memmove.*</tt>' intrinsics copy a block of memory from the
-   source location to the destination location, which may overlap.  It copies
-   "len" bytes of memory over.  If the argument is known to be aligned to some
-   boundary, this can be specified as the fourth argument, otherwise it should
-   be set to 0 or 1.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_memset">'<tt>llvm.memset.*</tt>' Intrinsics</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use llvm.memset on any integer bit
-   width and for different address spaces. However, not all targets support all
-   bit widths.</p>
-
-<pre>
-  declare void @llvm.memset.p0i8.i32(i8* &lt;dest&gt;, i8 &lt;val&gt;,
-                                     i32 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
-  declare void @llvm.memset.p0i8.i64(i8* &lt;dest&gt;, i8 &lt;val&gt;,
-                                     i64 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.memset.*</tt>' intrinsics fill a block of memory with a
-   particular byte value.</p>
-
-<p>Note that, unlike the standard libc function, the <tt>llvm.memset</tt>
-   intrinsic does not return a value and takes extra alignment/volatile
-   arguments.  Also, the destination can be in an arbitrary address space.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is a pointer to the destination to fill, the second is the
-   byte value with which to fill it, the third argument is an integer argument
-   specifying the number of bytes to fill, and the fourth argument is the known
-   alignment of the destination location.</p>
-
-<p>If the call to this intrinsic has an alignment value that is not 0 or 1,
-   then the caller guarantees that the destination pointer is aligned to that
-   boundary.</p>
-
-<p>If the <tt>isvolatile</tt> parameter is <tt>true</tt>, the
-   <tt>llvm.memset</tt> call is a <a href="#volatile">volatile operation</a>.
-   The detailed access behavior is not very cleanly specified and it is unwise
-   to depend on it.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.memset.*</tt>' intrinsics fill "len" bytes of memory starting
-   at the destination location.  If the argument is known to be aligned to some
-   boundary, this can be specified as the fourth argument, otherwise it should
-   be set to 0 or 1.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_sqrt">'<tt>llvm.sqrt.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.sqrt</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.sqrt.f32(float %Val)
-  declare double    @llvm.sqrt.f64(double %Val)
-  declare x86_fp80  @llvm.sqrt.f80(x86_fp80 %Val)
-  declare fp128     @llvm.sqrt.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.sqrt.ppcf128(ppc_fp128 %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.sqrt</tt>' intrinsics return the sqrt of the specified operand,
-   returning the same value as the libm '<tt>sqrt</tt>' functions would.
-   Unlike <tt>sqrt</tt> in libm, however, <tt>llvm.sqrt</tt> has undefined
-   behavior for negative numbers other than -0.0 (which allows for better
-   optimization, because there is no need to worry about errno being
-   set).  <tt>llvm.sqrt(-0.0)</tt> is defined to return -0.0 like IEEE sqrt.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the sqrt of the specified operand if it is a
-   nonnegative floating point number.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_powi">'<tt>llvm.powi.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.powi</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.powi.f32(float  %Val, i32 %power)
-  declare double    @llvm.powi.f64(double %Val, i32 %power)
-  declare x86_fp80  @llvm.powi.f80(x86_fp80  %Val, i32 %power)
-  declare fp128     @llvm.powi.f128(fp128 %Val, i32 %power)
-  declare ppc_fp128 @llvm.powi.ppcf128(ppc_fp128  %Val, i32 %power)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.powi.*</tt>' intrinsics return the first operand raised to the
-   specified (positive or negative) power.  The order of evaluation of
-   multiplications is not defined.  When a vector of floating point type is
-   used, the second argument remains a scalar integer value.</p>
-
-<h5>Arguments:</h5>
-<p>The second argument is an integer power, and the first is a value to raise to
-   that power.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the first value raised to the second power with an
-   unspecified sequence of rounding operations.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_sin">'<tt>llvm.sin.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.sin</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.sin.f32(float  %Val)
-  declare double    @llvm.sin.f64(double %Val)
-  declare x86_fp80  @llvm.sin.f80(x86_fp80  %Val)
-  declare fp128     @llvm.sin.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.sin.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.sin.*</tt>' intrinsics return the sine of the operand.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the sine of the specified operand, returning the same
-   values as the libm <tt>sin</tt> functions would, and handles error conditions
-   in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_cos">'<tt>llvm.cos.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.cos</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.cos.f32(float  %Val)
-  declare double    @llvm.cos.f64(double %Val)
-  declare x86_fp80  @llvm.cos.f80(x86_fp80  %Val)
-  declare fp128     @llvm.cos.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.cos.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.cos.*</tt>' intrinsics return the cosine of the operand.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the cosine of the specified operand, returning the same
-   values as the libm <tt>cos</tt> functions would, and handles error conditions
-   in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_pow">'<tt>llvm.pow.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.pow</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.pow.f32(float  %Val, float %Power)
-  declare double    @llvm.pow.f64(double %Val, double %Power)
-  declare x86_fp80  @llvm.pow.f80(x86_fp80  %Val, x86_fp80 %Power)
-  declare fp128     @llvm.pow.f128(fp128 %Val, fp128 %Power)
-  declare ppc_fp128 @llvm.pow.ppcf128(ppc_fp128  %Val, ppc_fp128 Power)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.pow.*</tt>' intrinsics return the first operand raised to the
-   specified (positive or negative) power.</p>
-
-<h5>Arguments:</h5>
-<p>The second argument is a floating point power, and the first is a value to
-   raise to that power.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the first value raised to the second power, returning
-   the same values as the libm <tt>pow</tt> functions would, and handles error
-   conditions in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_exp">'<tt>llvm.exp.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.exp</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.exp.f32(float  %Val)
-  declare double    @llvm.exp.f64(double %Val)
-  declare x86_fp80  @llvm.exp.f80(x86_fp80  %Val)
-  declare fp128     @llvm.exp.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.exp.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.exp.*</tt>' intrinsics perform the exp function.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the same values as the libm <tt>exp</tt> functions
-   would, and handles error conditions in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_exp2">'<tt>llvm.exp2.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.exp2</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.exp2.f32(float  %Val)
-  declare double    @llvm.exp2.f64(double %Val)
-  declare x86_fp80  @llvm.exp2.f80(x86_fp80  %Val)
-  declare fp128     @llvm.exp2.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.exp2.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.exp2.*</tt>' intrinsics perform the exp2 function.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the same values as the libm <tt>exp2</tt> functions
-   would, and handles error conditions in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_log">'<tt>llvm.log.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.log</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.log.f32(float  %Val)
-  declare double    @llvm.log.f64(double %Val)
-  declare x86_fp80  @llvm.log.f80(x86_fp80  %Val)
-  declare fp128     @llvm.log.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.log.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.log.*</tt>' intrinsics perform the log function.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the same values as the libm <tt>log</tt> functions
-   would, and handles error conditions in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_log10">'<tt>llvm.log10.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.log10</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.log10.f32(float  %Val)
-  declare double    @llvm.log10.f64(double %Val)
-  declare x86_fp80  @llvm.log10.f80(x86_fp80  %Val)
-  declare fp128     @llvm.log10.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.log10.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.log10.*</tt>' intrinsics perform the log10 function.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the same values as the libm <tt>log10</tt> functions
-   would, and handles error conditions in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_log2">'<tt>llvm.log2.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.log2</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.log2.f32(float  %Val)
-  declare double    @llvm.log2.f64(double %Val)
-  declare x86_fp80  @llvm.log2.f80(x86_fp80  %Val)
-  declare fp128     @llvm.log2.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.log2.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.log2.*</tt>' intrinsics perform the log2 function.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the same values as the libm <tt>log2</tt> functions
-   would, and handles error conditions in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_fma">'<tt>llvm.fma.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.fma</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.fma.f32(float  %a, float  %b, float  %c)
-  declare double    @llvm.fma.f64(double %a, double %b, double %c)
-  declare x86_fp80  @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c)
-  declare fp128     @llvm.fma.f128(fp128 %a, fp128 %b, fp128 %c)
-  declare ppc_fp128 @llvm.fma.ppcf128(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.fma.*</tt>' intrinsics perform the fused multiply-add
-   operation.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the same values as the libm <tt>fma</tt> functions
-   would.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_fabs">'<tt>llvm.fabs.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.fabs</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.fabs.f32(float  %Val)
-  declare double    @llvm.fabs.f64(double %Val)
-  declare x86_fp80  @llvm.fabs.f80(x86_fp80  %Val)
-  declare fp128     @llvm.fabs.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.fabs.*</tt>' intrinsics return the absolute value of
-   the operand.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the same values as the libm <tt>fabs</tt> functions
-   would, and handles error conditions in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_floor">'<tt>llvm.floor.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.floor</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.floor.f32(float  %Val)
-  declare double    @llvm.floor.f64(double %Val)
-  declare x86_fp80  @llvm.floor.f80(x86_fp80  %Val)
-  declare fp128     @llvm.floor.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.floor.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.floor.*</tt>' intrinsics return the floor of
-   the operand.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the same values as the libm <tt>floor</tt> functions
-   would, and handles error conditions in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_ceil">'<tt>llvm.ceil.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.ceil</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.ceil.f32(float  %Val)
-  declare double    @llvm.ceil.f64(double %Val)
-  declare x86_fp80  @llvm.ceil.f80(x86_fp80  %Val)
-  declare fp128     @llvm.ceil.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.ceil.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.ceil.*</tt>' intrinsics return the ceiling of
-   the operand.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the same values as the libm <tt>ceil</tt> functions
-   would, and handles error conditions in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_trunc">'<tt>llvm.trunc.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.trunc</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.trunc.f32(float  %Val)
-  declare double    @llvm.trunc.f64(double %Val)
-  declare x86_fp80  @llvm.trunc.f80(x86_fp80  %Val)
-  declare fp128     @llvm.trunc.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.trunc.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.trunc.*</tt>' intrinsics returns the operand rounded to the
-   nearest integer not larger in magnitude than the operand.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the same values as the libm <tt>trunc</tt> functions
-   would, and handles error conditions in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_rint">'<tt>llvm.rint.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.rint</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.rint.f32(float  %Val)
-  declare double    @llvm.rint.f64(double %Val)
-  declare x86_fp80  @llvm.rint.f80(x86_fp80  %Val)
-  declare fp128     @llvm.rint.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.rint.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.rint.*</tt>' intrinsics returns the operand rounded to the
-   nearest integer. It may raise an inexact floating-point exception if the
-   operand isn't an integer.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the same values as the libm <tt>rint</tt> functions
-   would, and handles error conditions in the same way.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_nearbyint">'<tt>llvm.nearbyint.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.nearbyint</tt> on any
-   floating point or vector of floating point type. Not all targets support all
-   types however.</p>
-
-<pre>
-  declare float     @llvm.nearbyint.f32(float  %Val)
-  declare double    @llvm.nearbyint.f64(double %Val)
-  declare x86_fp80  @llvm.nearbyint.f80(x86_fp80  %Val)
-  declare fp128     @llvm.nearbyint.f128(fp128 %Val)
-  declare ppc_fp128 @llvm.nearbyint.ppcf128(ppc_fp128  %Val)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.nearbyint.*</tt>' intrinsics returns the operand rounded to the
-   nearest integer.</p>
-
-<h5>Arguments:</h5>
-<p>The argument and return value are floating point numbers of the same
-   type.</p>
-
-<h5>Semantics:</h5>
-<p>This function returns the same values as the libm <tt>nearbyint</tt>
-   functions would, and handles error conditions in the same way.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_manip">Bit Manipulation Intrinsics</a>
-</h3>
-
-<div>
-
-<p>LLVM provides intrinsics for a few important bit manipulation operations.
-   These allow efficient code generation for some algorithms.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_bswap">'<tt>llvm.bswap.*</tt>' Intrinsics</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic function. You can use bswap on any integer
-   type that is an even number of bytes (i.e. BitWidth % 16 == 0).</p>
-
-<pre>
-  declare i16 @llvm.bswap.i16(i16 &lt;id&gt;)
-  declare i32 @llvm.bswap.i32(i32 &lt;id&gt;)
-  declare i64 @llvm.bswap.i64(i64 &lt;id&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.bswap</tt>' family of intrinsics is used to byte swap integer
-   values with an even number of bytes (positive multiple of 16 bits).  These
-   are useful for performing operations on data that is not in the target's
-   native byte order.</p>
-
-<h5>Semantics:</h5>
-<p>The <tt>llvm.bswap.i16</tt> intrinsic returns an i16 value that has the high
-   and low byte of the input i16 swapped.  Similarly,
-   the <tt>llvm.bswap.i32</tt> intrinsic returns an i32 value that has the four
-   bytes of the input i32 swapped, so that if the input bytes are numbered 0, 1,
-   2, 3 then the returned i32 will have its bytes in 3, 2, 1, 0 order.
-   The <tt>llvm.bswap.i48</tt>, <tt>llvm.bswap.i64</tt> and other intrinsics
-   extend this concept to additional even-byte lengths (6 bytes, 8 bytes and
-   more, respectively).</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_ctpop">'<tt>llvm.ctpop.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use llvm.ctpop on any integer bit
-   width, or on any vector with integer elements. Not all targets support all
-  bit widths or vector types, however.</p>
-
-<pre>
-  declare i8 @llvm.ctpop.i8(i8  &lt;src&gt;)
-  declare i16 @llvm.ctpop.i16(i16 &lt;src&gt;)
-  declare i32 @llvm.ctpop.i32(i32 &lt;src&gt;)
-  declare i64 @llvm.ctpop.i64(i64 &lt;src&gt;)
-  declare i256 @llvm.ctpop.i256(i256 &lt;src&gt;)
-  declare &lt;2 x i32&gt; @llvm.ctpop.v2i32(&lt;2 x i32&gt; &lt;src&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.ctpop</tt>' family of intrinsics counts the number of bits set
-   in a value.</p>
-
-<h5>Arguments:</h5>
-<p>The only argument is the value to be counted.  The argument may be of any
-   integer type, or a vector with integer elements.
-   The return type must match the argument type.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.ctpop</tt>' intrinsic counts the 1's in a variable, or within each
-   element of a vector.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_ctlz">'<tt>llvm.ctlz.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.ctlz</tt> on any
-   integer bit width, or any vector whose elements are integers. Not all
-   targets support all bit widths or vector types, however.</p>
-
-<pre>
-  declare i8   @llvm.ctlz.i8  (i8   &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declare i16  @llvm.ctlz.i16 (i16  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declare i32  @llvm.ctlz.i32 (i32  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declare i64  @llvm.ctlz.i64 (i64  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declare i256 @llvm.ctlz.i256(i256 &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declase &lt;2 x i32&gt; @llvm.ctlz.v2i32(&lt;2 x i32&gt; &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.ctlz</tt>' family of intrinsic functions counts the number of
-   leading zeros in a variable.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is the value to be counted. This argument may be of any
-   integer type, or a vectory with integer element type. The return type
-   must match the first argument type.</p>
-
-<p>The second argument must be a constant and is a flag to indicate whether the
-   intrinsic should ensure that a zero as the first argument produces a defined
-   result. Historically some architectures did not provide a defined result for
-   zero values as efficiently, and many algorithms are now predicated on
-   avoiding zero-value inputs.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.ctlz</tt>' intrinsic counts the leading (most significant)
-   zeros in a variable, or within each element of the vector.
-   If <tt>src == 0</tt> then the result is the size in bits of the type of
-   <tt>src</tt> if <tt>is_zero_undef == 0</tt> and <tt>undef</tt> otherwise.
-   For example, <tt>llvm.ctlz(i32 2) = 30</tt>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_cttz">'<tt>llvm.cttz.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.cttz</tt> on any
-   integer bit width, or any vector of integer elements. Not all targets
-   support all bit widths or vector types, however.</p>
-
-<pre>
-  declare i8   @llvm.cttz.i8  (i8   &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declare i16  @llvm.cttz.i16 (i16  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declare i32  @llvm.cttz.i32 (i32  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declare i64  @llvm.cttz.i64 (i64  &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declare i256 @llvm.cttz.i256(i256 &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-  declase &lt;2 x i32&gt; @llvm.cttz.v2i32(&lt;2 x i32&gt; &lt;src&gt;, i1 &lt;is_zero_undef&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.cttz</tt>' family of intrinsic functions counts the number of
-   trailing zeros.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is the value to be counted. This argument may be of any
-   integer type, or a vectory with integer element type. The return type
-   must match the first argument type.</p>
-
-<p>The second argument must be a constant and is a flag to indicate whether the
-   intrinsic should ensure that a zero as the first argument produces a defined
-   result. Historically some architectures did not provide a defined result for
-   zero values as efficiently, and many algorithms are now predicated on
-   avoiding zero-value inputs.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.cttz</tt>' intrinsic counts the trailing (least significant)
-   zeros in a variable, or within each element of a vector.
-   If <tt>src == 0</tt> then the result is the size in bits of the type of
-   <tt>src</tt> if <tt>is_zero_undef == 0</tt> and <tt>undef</tt> otherwise.
-   For example, <tt>llvm.cttz(2) = 1</tt>.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_overflow">Arithmetic with Overflow Intrinsics</a>
-</h3>
-
-<div>
-
-<p>LLVM provides intrinsics for some arithmetic with overflow operations.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_sadd_overflow">
-    '<tt>llvm.sadd.with.overflow.*</tt>' Intrinsics
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.sadd.with.overflow</tt>
-   on any integer bit width.</p>
-
-<pre>
-  declare {i16, i1} @llvm.sadd.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.sadd.with.overflow</tt>' family of intrinsic functions perform
-   a signed addition of the two arguments, and indicate whether an overflow
-   occurred during the signed summation.</p>
-
-<h5>Arguments:</h5>
-<p>The arguments (%a and %b) and the first element of the result structure may
-   be of integer types of any bit width, but they must have the same bit
-   width. The second element of the result structure must be of
-   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
-   undergo signed addition.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.sadd.with.overflow</tt>' family of intrinsic functions perform
-   a signed addition of the two variables. They return a structure &mdash; the
-   first element of which is the signed summation, and the second element of
-   which is a bit specifying if the signed summation resulted in an
-   overflow.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %res = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %overflow, label %normal
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_uadd_overflow">
-    '<tt>llvm.uadd.with.overflow.*</tt>' Intrinsics
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.uadd.with.overflow</tt>
-   on any integer bit width.</p>
-
-<pre>
-  declare {i16, i1} @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.uadd.with.overflow</tt>' family of intrinsic functions perform
-   an unsigned addition of the two arguments, and indicate whether a carry
-   occurred during the unsigned summation.</p>
-
-<h5>Arguments:</h5>
-<p>The arguments (%a and %b) and the first element of the result structure may
-   be of integer types of any bit width, but they must have the same bit
-   width. The second element of the result structure must be of
-   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
-   undergo unsigned addition.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.uadd.with.overflow</tt>' family of intrinsic functions perform
-   an unsigned addition of the two arguments. They return a structure &mdash;
-   the first element of which is the sum, and the second element of which is a
-   bit specifying if the unsigned summation resulted in a carry.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %res = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %carry, label %normal
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_ssub_overflow">
-    '<tt>llvm.ssub.with.overflow.*</tt>' Intrinsics
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.ssub.with.overflow</tt>
-   on any integer bit width.</p>
-
-<pre>
-  declare {i16, i1} @llvm.ssub.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.ssub.with.overflow</tt>' family of intrinsic functions perform
-   a signed subtraction of the two arguments, and indicate whether an overflow
-   occurred during the signed subtraction.</p>
-
-<h5>Arguments:</h5>
-<p>The arguments (%a and %b) and the first element of the result structure may
-   be of integer types of any bit width, but they must have the same bit
-   width. The second element of the result structure must be of
-   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
-   undergo signed subtraction.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.ssub.with.overflow</tt>' family of intrinsic functions perform
-   a signed subtraction of the two arguments. They return a structure &mdash;
-   the first element of which is the subtraction, and the second element of
-   which is a bit specifying if the signed subtraction resulted in an
-   overflow.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %res = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %overflow, label %normal
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_usub_overflow">
-    '<tt>llvm.usub.with.overflow.*</tt>' Intrinsics
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.usub.with.overflow</tt>
-   on any integer bit width.</p>
-
-<pre>
-  declare {i16, i1} @llvm.usub.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.usub.with.overflow</tt>' family of intrinsic functions perform
-   an unsigned subtraction of the two arguments, and indicate whether an
-   overflow occurred during the unsigned subtraction.</p>
-
-<h5>Arguments:</h5>
-<p>The arguments (%a and %b) and the first element of the result structure may
-   be of integer types of any bit width, but they must have the same bit
-   width. The second element of the result structure must be of
-   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
-   undergo unsigned subtraction.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.usub.with.overflow</tt>' family of intrinsic functions perform
-   an unsigned subtraction of the two arguments. They return a structure &mdash;
-   the first element of which is the subtraction, and the second element of
-   which is a bit specifying if the unsigned subtraction resulted in an
-   overflow.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %res = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %overflow, label %normal
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_smul_overflow">
-    '<tt>llvm.smul.with.overflow.*</tt>' Intrinsics
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.smul.with.overflow</tt>
-   on any integer bit width.</p>
-
-<pre>
-  declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.smul.with.overflow.i64(i64 %a, i64 %b)
-</pre>
-
-<h5>Overview:</h5>
-
-<p>The '<tt>llvm.smul.with.overflow</tt>' family of intrinsic functions perform
-   a signed multiplication of the two arguments, and indicate whether an
-   overflow occurred during the signed multiplication.</p>
-
-<h5>Arguments:</h5>
-<p>The arguments (%a and %b) and the first element of the result structure may
-   be of integer types of any bit width, but they must have the same bit
-   width. The second element of the result structure must be of
-   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
-   undergo signed multiplication.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.smul.with.overflow</tt>' family of intrinsic functions perform
-   a signed multiplication of the two arguments. They return a structure &mdash;
-   the first element of which is the multiplication, and the second element of
-   which is a bit specifying if the signed multiplication resulted in an
-   overflow.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %res = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %overflow, label %normal
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_umul_overflow">
-    '<tt>llvm.umul.with.overflow.*</tt>' Intrinsics
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use <tt>llvm.umul.with.overflow</tt>
-   on any integer bit width.</p>
-
-<pre>
-  declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
-  declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
-  declare {i64, i1} @llvm.umul.with.overflow.i64(i64 %a, i64 %b)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.umul.with.overflow</tt>' family of intrinsic functions perform
-   a unsigned multiplication of the two arguments, and indicate whether an
-   overflow occurred during the unsigned multiplication.</p>
-
-<h5>Arguments:</h5>
-<p>The arguments (%a and %b) and the first element of the result structure may
-   be of integer types of any bit width, but they must have the same bit
-   width. The second element of the result structure must be of
-   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
-   undergo unsigned multiplication.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.umul.with.overflow</tt>' family of intrinsic functions perform
-   an unsigned multiplication of the two arguments. They return a structure
-   &mdash; the first element of which is the multiplication, and the second
-   element of which is a bit specifying if the unsigned multiplication resulted
-   in an overflow.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
-  %sum = extractvalue {i32, i1} %res, 0
-  %obit = extractvalue {i32, i1} %res, 1
-  br i1 %obit, label %overflow, label %normal
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="spec_arithmetic">Specialised Arithmetic Intrinsics</a>
-</h3>
-
-<!-- _______________________________________________________________________ -->
-
-<h4>
-  <a name="fmuladd">'<tt>llvm.fmuladd.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare float @llvm.fmuladd.f32(float %a, float %b, float %c)
-  declare double @llvm.fmuladd.f64(double %a, double %b, double %c)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.fmuladd.*</tt>' intrinsic functions represent multiply-add
-expressions that can be fused if the code generator determines that the fused
-expression would be legal and efficient.</p>
-
-<h5>Arguments:</h5>
-<p>The '<tt>llvm.fmuladd.*</tt>' intrinsics each take three arguments: two
-multiplicands, a and b, and an addend c.</p>
-
-<h5>Semantics:</h5>
-<p>The expression:</p>
-<pre>
-  %0 = call float @llvm.fmuladd.f32(%a, %b, %c)
-</pre>
-<p>is equivalent to the expression a * b + c, except that rounding will not be
-performed between the multiplication and addition steps if the code generator
-fuses the operations. Fusion is not guaranteed, even if the target platform
-supports it. If a fused multiply-add is required the corresponding llvm.fma.*
-intrinsic function should be used instead.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields {float}:r2 = (a * b) + c
-</pre>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_fp16">Half Precision Floating Point Intrinsics</a>
-</h3>
-
-<div>
-
-<p>For most target platforms, half precision floating point is a storage-only
-   format. This means that it is
-   a dense encoding (in memory) but does not support computation in the
-   format.</p>
-
-<p>This means that code must first load the half-precision floating point
-   value as an i16, then convert it to float with <a
-   href="#int_convert_from_fp16"><tt>llvm.convert.from.fp16</tt></a>.
-   Computation can then be performed on the float value (including extending to
-   double etc).  To store the value back to memory, it is first converted to
-   float if needed, then converted to i16 with
-   <a href="#int_convert_to_fp16"><tt>llvm.convert.to.fp16</tt></a>, then
-   storing as an i16 value.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_convert_to_fp16">
-    '<tt>llvm.convert.to.fp16</tt>' Intrinsic
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i16 @llvm.convert.to.fp16(f32 %a)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.convert.to.fp16</tt>' intrinsic function performs
-   a conversion from single precision floating point format to half precision
-   floating point format.</p>
-
-<h5>Arguments:</h5>
-<p>The intrinsic function contains single argument - the value to be
-   converted.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.convert.to.fp16</tt>' intrinsic function performs
-   a conversion from single precision floating point format to half precision
-   floating point format. The return value is an <tt>i16</tt> which
-   contains the converted number.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %res = call i16 @llvm.convert.to.fp16(f32 %a)
-  store i16 %res, i16* @x, align 2
-</pre>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_convert_from_fp16">
-    '<tt>llvm.convert.from.fp16</tt>' Intrinsic
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare f32 @llvm.convert.from.fp16(i16 %a)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.convert.from.fp16</tt>' intrinsic function performs
-   a conversion from half precision floating point format to single precision
-   floating point format.</p>
-
-<h5>Arguments:</h5>
-<p>The intrinsic function contains single argument - the value to be
-   converted.</p>
-
-<h5>Semantics:</h5>
-<p>The '<tt>llvm.convert.from.fp16</tt>' intrinsic function performs a
-   conversion from half single precision floating point format to single
-   precision floating point format. The input half-float value is represented by
-   an <tt>i16</tt> value.</p>
-
-<h5>Examples:</h5>
-<pre>
-  %a = load i16* @x, align 2
-  %res = call f32 @llvm.convert.from.fp16(i16 %a)
-</pre>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_debugger">Debugger Intrinsics</a>
-</h3>
-
-<div>
-
-<p>The LLVM debugger intrinsics (which all start with <tt>llvm.dbg.</tt>
-   prefix), are described in
-   the <a href="SourceLevelDebugging.html#format_common_intrinsics">LLVM Source
-   Level Debugging</a> document.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_eh">Exception Handling Intrinsics</a>
-</h3>
-
-<div>
-
-<p>The LLVM exception handling intrinsics (which all start with
-   <tt>llvm.eh.</tt> prefix), are described in
-   the <a href="ExceptionHandling.html#format_common_intrinsics">LLVM Exception
-   Handling</a> document.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_trampoline">Trampoline Intrinsics</a>
-</h3>
-
-<div>
-
-<p>These intrinsics make it possible to excise one parameter, marked with
-   the <a href="#nest"><tt>nest</tt></a> attribute, from a function.
-   The result is a callable
-   function pointer lacking the nest parameter - the caller does not need to
-   provide a value for it.  Instead, the value to use is stored in advance in a
-   "trampoline", a block of memory usually allocated on the stack, which also
-   contains code to splice the nest value into the argument list.  This is used
-   to implement the GCC nested function address extension.</p>
-
-<p>For example, if the function is
-   <tt>i32 f(i8* nest %c, i32 %x, i32 %y)</tt> then the resulting function
-   pointer has signature <tt>i32 (i32, i32)*</tt>.  It can be created as
-   follows:</p>
-
-<pre class="doc_code">
-  %tramp = alloca [10 x i8], align 4 ; size and alignment only correct for X86
-  %tramp1 = getelementptr [10 x i8]* %tramp, i32 0, i32 0
-  call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8*, i32, i32)* @f to i8*), i8* %nval)
-  %p = call i8* @llvm.adjust.trampoline(i8* %tramp1)
-  %fp = bitcast i8* %p to i32 (i32, i32)*
-</pre>
-
-<p>The call <tt>%val = call i32 %fp(i32 %x, i32 %y)</tt> is then equivalent
-   to <tt>%val = call i32 %f(i8* %nval, i32 %x, i32 %y)</tt>.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_it">
-    '<tt>llvm.init.trampoline</tt>' Intrinsic
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.init.trampoline(i8* &lt;tramp&gt;, i8* &lt;func&gt;, i8* &lt;nval&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>This fills the memory pointed to by <tt>tramp</tt> with executable code,
-   turning it into a trampoline.</p>
-
-<h5>Arguments:</h5>
-<p>The <tt>llvm.init.trampoline</tt> intrinsic takes three arguments, all
-   pointers.  The <tt>tramp</tt> argument must point to a sufficiently large and
-   sufficiently aligned block of memory; this memory is written to by the
-   intrinsic.  Note that the size and the alignment are target-specific - LLVM
-   currently provides no portable way of determining them, so a front-end that
-   generates this intrinsic needs to have some target-specific knowledge.
-   The <tt>func</tt> argument must hold a function bitcast to
-   an <tt>i8*</tt>.</p>
-
-<h5>Semantics:</h5>
-<p>The block of memory pointed to by <tt>tramp</tt> is filled with target
-   dependent code, turning it into a function.  Then <tt>tramp</tt> needs to be
-   passed to <a href="#int_at">llvm.adjust.trampoline</a> to get a pointer
-   which can be <a href="#int_trampoline">bitcast (to a new function) and
-   called</a>.  The new function's signature is the same as that of
-   <tt>func</tt> with any arguments marked with the <tt>nest</tt> attribute
-   removed.  At most one such <tt>nest</tt> argument is allowed, and it must be of
-   pointer type.  Calling the new function is equivalent to calling <tt>func</tt>
-   with the same argument list, but with <tt>nval</tt> used for the missing
-   <tt>nest</tt> argument.  If, after calling <tt>llvm.init.trampoline</tt>, the
-   memory pointed to by <tt>tramp</tt> is modified, then the effect of any later call
-   to the returned function pointer is undefined.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_at">
-    '<tt>llvm.adjust.trampoline</tt>' Intrinsic
-  </a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i8* @llvm.adjust.trampoline(i8* &lt;tramp&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>This performs any required machine-specific adjustment to the address of a
-   trampoline (passed as <tt>tramp</tt>).</p>
-
-<h5>Arguments:</h5>
-<p><tt>tramp</tt> must point to a block of memory which already has trampoline code
-   filled in by a previous call to <a href="#int_it"><tt>llvm.init.trampoline</tt>
-   </a>.</p>
-
-<h5>Semantics:</h5>
-<p>On some architectures the address of the code to be executed needs to be
-   different to the address where the trampoline is actually stored.  This
-   intrinsic returns the executable address corresponding to <tt>tramp</tt>
-   after performing the required machine specific adjustments.
-   The pointer returned can then be <a href="#int_trampoline"> bitcast and
-   executed</a>.
-</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_memorymarkers">Memory Use Markers</a>
-</h3>
-
-<div>
-
-<p>This class of intrinsics exists to information about the lifetime of memory
-   objects and ranges where variables are immutable.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_lifetime_start">'<tt>llvm.lifetime.start</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.lifetime.start(i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.lifetime.start</tt>' intrinsic specifies the start of a memory
-   object's lifetime.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is a constant integer representing the size of the
-   object, or -1 if it is variable sized.  The second argument is a pointer to
-   the object.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic indicates that before this point in the code, the value of the
-   memory pointed to by <tt>ptr</tt> is dead.  This means that it is known to
-   never be used and has an undefined value.  A load from the pointer that
-   precedes this intrinsic can be replaced with
-   <tt>'<a href="#undefvalues">undef</a>'</tt>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_lifetime_end">'<tt>llvm.lifetime.end</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.lifetime.end(i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.lifetime.end</tt>' intrinsic specifies the end of a memory
-   object's lifetime.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is a constant integer representing the size of the
-   object, or -1 if it is variable sized.  The second argument is a pointer to
-   the object.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic indicates that after this point in the code, the value of the
-   memory pointed to by <tt>ptr</tt> is dead.  This means that it is known to
-   never be used and has an undefined value.  Any stores into the memory object
-   following this intrinsic may be removed as dead.
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_invariant_start">'<tt>llvm.invariant.start</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare {}* @llvm.invariant.start(i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.invariant.start</tt>' intrinsic specifies that the contents of
-   a memory object will not change.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is a constant integer representing the size of the
-   object, or -1 if it is variable sized.  The second argument is a pointer to
-   the object.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic indicates that until an <tt>llvm.invariant.end</tt> that uses
-   the return value, the referenced memory location is constant and
-   unchanging.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_invariant_end">'<tt>llvm.invariant.end</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.invariant.end({}* &lt;start&gt;, i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.invariant.end</tt>' intrinsic specifies that the contents of
-   a memory object are mutable.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is the matching <tt>llvm.invariant.start</tt> intrinsic.
-   The second argument is a constant integer representing the size of the
-   object, or -1 if it is variable sized and the third argument is a pointer
-   to the object.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic indicates that the memory is mutable again.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="int_general">General Intrinsics</a>
-</h3>
-
-<div>
-
-<p>This class of intrinsics is designed to be generic and has no specific
-   purpose.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_var_annotation">'<tt>llvm.var.annotation</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.var.annotation(i8* &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.var.annotation</tt>' intrinsic.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is a pointer to a value, the second is a pointer to a
-   global string, the third is a pointer to a global string which is the source
-   file name, and the last argument is the line number.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic allows annotation of local variables with arbitrary strings.
-   This can be useful for special purpose optimizations that want to look for
-   these annotations.  These have no other defined use; they are ignored by code
-   generation and optimization.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_annotation">'<tt>llvm.annotation.*</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<p>This is an overloaded intrinsic. You can use '<tt>llvm.annotation</tt>' on
-   any integer bit width.</p>
-
-<pre>
-  declare i8 @llvm.annotation.i8(i8 &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
-  declare i16 @llvm.annotation.i16(i16 &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
-  declare i32 @llvm.annotation.i32(i32 &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
-  declare i64 @llvm.annotation.i64(i64 &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
-  declare i256 @llvm.annotation.i256(i256 &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.annotation</tt>' intrinsic.</p>
-
-<h5>Arguments:</h5>
-<p>The first argument is an integer value (result of some expression), the
-   second is a pointer to a global string, the third is a pointer to a global
-   string which is the source file name, and the last argument is the line
-   number.  It returns the value of the first argument.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic allows annotations to be put on arbitrary expressions with
-   arbitrary strings.  This can be useful for special purpose optimizations that
-   want to look for these annotations.  These have no other defined use; they
-   are ignored by code generation and optimization.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_trap">'<tt>llvm.trap</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.trap() noreturn nounwind
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.trap</tt>' intrinsic.</p>
-
-<h5>Arguments:</h5>
-<p>None.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic is lowered to the target dependent trap instruction. If the
-   target does not have a trap instruction, this intrinsic will be lowered to
-   a call of the <tt>abort()</tt> function.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_debugtrap">'<tt>llvm.debugtrap</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.debugtrap() nounwind
-</pre>
-
-<h5>Overview:</h5>
-<p>The '<tt>llvm.debugtrap</tt>' intrinsic.</p>
-
-<h5>Arguments:</h5>
-<p>None.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic is lowered to code which is intended to cause an execution
-   trap with the intention of requesting the attention of a debugger.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_stackprotector">'<tt>llvm.stackprotector</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.stackprotector(i8* &lt;guard&gt;, i8** &lt;slot&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The <tt>llvm.stackprotector</tt> intrinsic takes the <tt>guard</tt> and
-   stores it onto the stack at <tt>slot</tt>. The stack slot is adjusted to
-   ensure that it is placed on the stack before local variables.</p>
-
-<h5>Arguments:</h5>
-<p>The <tt>llvm.stackprotector</tt> intrinsic requires two pointer
-   arguments. The first argument is the value loaded from the stack
-   guard <tt>@__stack_chk_guard</tt>. The second variable is an <tt>alloca</tt>
-   that has enough space to hold the value of the guard.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic causes the prologue/epilogue inserter to force the position of
-   the <tt>AllocaInst</tt> stack slot to be before local variables on the
-   stack. This is to ensure that if a local variable on the stack is
-   overwritten, it will destroy the value of the guard. When the function exits,
-   the guard on the stack is checked against the original guard. If they are
-   different, then the program aborts by calling the <tt>__stack_chk_fail()</tt>
-   function.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_objectsize">'<tt>llvm.objectsize</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i32 @llvm.objectsize.i32(i8* &lt;object&gt;, i1 &lt;min&gt;)
-  declare i64 @llvm.objectsize.i64(i8* &lt;object&gt;, i1 &lt;min&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The <tt>llvm.objectsize</tt> intrinsic is designed to provide information to
-   the optimizers to determine at compile time whether a) an operation (like
-   memcpy) will overflow a buffer that corresponds to an object, or b) that a
-   runtime check for overflow isn't necessary. An object in this context means
-   an allocation of a specific class, structure, array, or other object.</p>
-
-<h5>Arguments:</h5>
-<p>The <tt>llvm.objectsize</tt> intrinsic takes two arguments. The first
-   argument is a pointer to or into the <tt>object</tt>. The second argument
-   is a boolean and determines whether <tt>llvm.objectsize</tt> returns 0 (if
-   true) or -1 (if false) when the object size is unknown.
-   The second argument only accepts constants.</p>
-
-<h5>Semantics:</h5>
-<p>The <tt>llvm.objectsize</tt> intrinsic is lowered to a constant representing
-   the size of the object concerned. If the size cannot be determined at compile
-   time, <tt>llvm.objectsize</tt> returns <tt>i32/i64 -1 or 0</tt>
-   (depending on the <tt>min</tt> argument).</p>
-
-</div>
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_expect">'<tt>llvm.expect</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare i32 @llvm.expect.i32(i32 &lt;val&gt;, i32 &lt;expected_val&gt;)
-  declare i64 @llvm.expect.i64(i64 &lt;val&gt;, i64 &lt;expected_val&gt;)
-</pre>
-
-<h5>Overview:</h5>
-<p>The <tt>llvm.expect</tt> intrinsic provides information about expected (the
-   most probable) value of <tt>val</tt>, which can be used by optimizers.</p>
-
-<h5>Arguments:</h5>
-<p>The <tt>llvm.expect</tt> intrinsic takes two arguments. The first
-   argument is a value. The second argument is an expected value, this needs to
-   be a constant value, variables are not allowed.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic is lowered to the <tt>val</tt>.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="int_donothing">'<tt>llvm.donothing</tt>' Intrinsic</a>
-</h4>
-
-<div>
-
-<h5>Syntax:</h5>
-<pre>
-  declare void @llvm.donothing() nounwind readnone
-</pre>
-
-<h5>Overview:</h5>
-<p>The <tt>llvm.donothing</tt> intrinsic doesn't perform any operation. It's the
-only intrinsic that can be called with an invoke instruction.</p>
-
-<h5>Arguments:</h5>
-<p>None.</p>
-
-<h5>Semantics:</h5>
-<p>This intrinsic does nothing, and it's removed by optimizers and ignored by
-codegen.</p>
-</div>
-
-</div>
-
-</div>
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-
-</body>
-</html>
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
new file mode 100644
index 0000000000..1ea475dee6
--- /dev/null
+++ b/docs/LangRef.rst
@@ -0,0 +1,8298 @@
+==============================
+LLVM Language Reference Manual
+==============================
+
+.. contents::
+   :local:
+   :depth: 3
+
+Abstract
+========
+
+This document is a reference manual for the LLVM assembly language. LLVM
+is a Static Single Assignment (SSA) based representation that provides
+type safety, low-level operations, flexibility, and the capability of
+representing 'all' high-level languages cleanly. It is the common code
+representation used throughout all phases of the LLVM compilation
+strategy.
+
+Introduction
+============
+
+The LLVM code representation is designed to be used in three different
+forms: as an in-memory compiler IR, as an on-disk bitcode representation
+(suitable for fast loading by a Just-In-Time compiler), and as a human
+readable assembly language representation. This allows LLVM to provide a
+powerful intermediate representation for efficient compiler
+transformations and analysis, while providing a natural means to debug
+and visualize the transformations. The three different forms of LLVM are
+all equivalent. This document describes the human readable
+representation and notation.
+
+The LLVM representation aims to be light-weight and low-level while
+being expressive, typed, and extensible at the same time. It aims to be
+a "universal IR" of sorts, by being at a low enough level that
+high-level ideas may be cleanly mapped to it (similar to how
+microprocessors are "universal IR's", allowing many source languages to
+be mapped to them). By providing type information, LLVM can be used as
+the target of optimizations: for example, through pointer analysis, it
+can be proven that a C automatic variable is never accessed outside of
+the current function, allowing it to be promoted to a simple SSA value
+instead of a memory location.
+
+.. _wellformed:
+
+Well-Formedness
+---------------
+
+It is important to note that this document describes 'well formed' LLVM
+assembly language. There is a difference between what the parser accepts
+and what is considered 'well formed'. For example, the following
+instruction is syntactically okay, but not well formed:
+
+.. code-block:: llvm
+
+    %x = add i32 1, %x
+
+because the definition of ``%x`` does not dominate all of its uses. The
+LLVM infrastructure provides a verification pass that may be used to
+verify that an LLVM module is well formed. This pass is automatically
+run by the parser after parsing input assembly and by the optimizer
+before it outputs bitcode. The violations pointed out by the verifier
+pass indicate bugs in transformation passes or input to the parser.
+
+.. _identifiers:
+
+Identifiers
+===========
+
+LLVM identifiers come in two basic types: global and local. Global
+identifiers (functions, global variables) begin with the ``'@'``
+character. Local identifiers (register names, types) begin with the
+``'%'`` character. Additionally, there are three different formats for
+identifiers, for different purposes:
+
+#. Named values are represented as a string of characters with their
+   prefix. For example, ``%foo``, ``@DivisionByZero``,
+   ``%a.really.long.identifier``. The actual regular expression used is
+   '``[%@][a-zA-Z$._][a-zA-Z$._0-9]*``'. Identifiers which require other
+   characters in their names can be surrounded with quotes. Special
+   characters may be escaped using ``"\xx"`` where ``xx`` is the ASCII
+   code for the character in hexadecimal. In this way, any character can
+   be used in a name value, even quotes themselves.
+#. Unnamed values are represented as an unsigned numeric value with
+   their prefix. For example, ``%12``, ``@2``, ``%44``.
+#. Constants, which are described in the section  Constants_ below.
+
+LLVM requires that values start with a prefix for two reasons: Compilers
+don't need to worry about name clashes with reserved words, and the set
+of reserved words may be expanded in the future without penalty.
+Additionally, unnamed identifiers allow a compiler to quickly come up
+with a temporary variable without having to avoid symbol table
+conflicts.
+
+Reserved words in LLVM are very similar to reserved words in other
+languages. There are keywords for different opcodes ('``add``',
+'``bitcast``', '``ret``', etc...), for primitive type names ('``void``',
+'``i32``', etc...), and others. These reserved words cannot conflict
+with variable names, because none of them start with a prefix character
+(``'%'`` or ``'@'``).
+
+Here is an example of LLVM code to multiply the integer variable
+'``%X``' by 8:
+
+The easy way:
+
+.. code-block:: llvm
+
+    %result = mul i32 %X, 8
+
+After strength reduction:
+
+.. code-block:: llvm
+
+    %result = shl i32 %X, i8 3
+
+And the hard way:
+
+.. code-block:: llvm
+
+    %0 = add i32 %X, %X           ; yields {i32}:%0
+    %1 = add i32 %0, %0           ; yields {i32}:%1
+    %result = add i32 %1, %1
+
+This last way of multiplying ``%X`` by 8 illustrates several important
+lexical features of LLVM:
+
+#. Comments are delimited with a '``;``' and go until the end of line.
+#. Unnamed temporaries are created when the result of a computation is
+   not assigned to a named value.
+#. Unnamed temporaries are numbered sequentially
+
+It also shows a convention that we follow in this document. When
+demonstrating instructions, we will follow an instruction with a comment
+that defines the type and name of value produced.
+
+High Level Structure
+====================
+
+Module Structure
+----------------
+
+LLVM programs are composed of ``Module``'s, each of which is a
+translation unit of the input programs. Each module consists of
+functions, global variables, and symbol table entries. Modules may be
+combined together with the LLVM linker, which merges function (and
+global variable) definitions, resolves forward declarations, and merges
+symbol table entries. Here is an example of the "hello world" module:
+
+.. code-block:: llvm
+
+    ; Declare the string constant as a global constant. 
+    @.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00" 
+
+    ; External declaration of the puts function 
+    declare i32 @puts(i8* nocapture) nounwind 
+
+    ; Definition of main function
+    define i32 @main() {   ; i32()*  
+      ; Convert [13 x i8]* to i8  *... 
+      %cast210 = getelementptr [13 x i8]* @.str, i64 0, i64 0
+
+      ; Call puts function to write out the string to stdout. 
+      call i32 @puts(i8* %cast210)
+      ret i32 0 
+    }
+
+    ; Named metadata
+    !1 = metadata !{i32 42}
+    !foo = !{!1, null}
+
+This example is made up of a :ref:`global variable <globalvars>` named
+"``.str``", an external declaration of the "``puts``" function, a
+:ref:`function definition <functionstructure>` for "``main``" and
+:ref:`named metadata <namedmetadatastructure>` "``foo``".
+
+In general, a module is made up of a list of global values (where both
+functions and global variables are global values). Global values are
+represented by a pointer to a memory location (in this case, a pointer
+to an array of char, and a pointer to a function), and have one of the
+following :ref:`linkage types <linkage>`.
+
+.. _linkage:
+
+Linkage Types
+-------------
+
+All Global Variables and Functions have one of the following types of
+linkage:
+
+``private``
+    Global values with "``private``" linkage are only directly
+    accessible by objects in the current module. In particular, linking
+    code into a module with an private global value may cause the
+    private to be renamed as necessary to avoid collisions. Because the
+    symbol is private to the module, all references can be updated. This
+    doesn't show up in any symbol table in the object file.
+``linker_private``
+    Similar to ``private``, but the symbol is passed through the
+    assembler and evaluated by the linker. Unlike normal strong symbols,
+    they are removed by the linker from the final linked image
+    (executable or dynamic library).
+``linker_private_weak``
+    Similar to "``linker_private``", but the symbol is weak. Note that
+    ``linker_private_weak`` symbols are subject to coalescing by the
+    linker. The symbols are removed by the linker from the final linked
+    image (executable or dynamic library).
+``internal``
+    Similar to private, but the value shows as a local symbol
+    (``STB_LOCAL`` in the case of ELF) in the object file. This
+    corresponds to the notion of the '``static``' keyword in C.
+``available_externally``
+    Globals with "``available_externally``" linkage are never emitted
+    into the object file corresponding to the LLVM module. They exist to
+    allow inlining and other optimizations to take place given knowledge
+    of the definition of the global, which is known to be somewhere
+    outside the module. Globals with ``available_externally`` linkage
+    are allowed to be discarded at will, and are otherwise the same as
+    ``linkonce_odr``. This linkage type is only allowed on definitions,
+    not declarations.
+``linkonce``
+    Globals with "``linkonce``" linkage are merged with other globals of
+    the same name when linkage occurs. This can be used to implement
+    some forms of inline functions, templates, or other code which must
+    be generated in each translation unit that uses it, but where the
+    body may be overridden with a more definitive definition later.
+    Unreferenced ``linkonce`` globals are allowed to be discarded. Note
+    that ``linkonce`` linkage does not actually allow the optimizer to
+    inline the body of this function into callers because it doesn't
+    know if this definition of the function is the definitive definition
+    within the program or whether it will be overridden by a stronger
+    definition. To enable inlining and other optimizations, use
+    "``linkonce_odr``" linkage.
+``weak``
+    "``weak``" linkage has the same merging semantics as ``linkonce``
+    linkage, except that unreferenced globals with ``weak`` linkage may
+    not be discarded. This is used for globals that are declared "weak"
+    in C source code.
+``common``
+    "``common``" linkage is most similar to "``weak``" linkage, but they
+    are used for tentative definitions in C, such as "``int X;``" at
+    global scope. Symbols with "``common``" linkage are merged in the
+    same way as ``weak symbols``, and they may not be deleted if
+    unreferenced. ``common`` symbols may not have an explicit section,
+    must have a zero initializer, and may not be marked
+    ':ref:`constant <globalvars>`'. Functions and aliases may not have
+    common linkage.
+
+.. _linkage_appending:
+
+``appending``
+    "``appending``" linkage may only be applied to global variables of
+    pointer to array type. When two global variables with appending
+    linkage are linked together, the two global arrays are appended
+    together. This is the LLVM, typesafe, equivalent of having the
+    system linker append together "sections" with identical names when
+    .o files are linked.
+``extern_weak``
+    The semantics of this linkage follow the ELF object file model: the
+    symbol is weak until linked, if not linked, the symbol becomes null
+    instead of being an undefined reference.
+``linkonce_odr``, ``weak_odr``
+    Some languages allow differing globals to be merged, such as two
+    functions with different semantics. Other languages, such as
+    ``C++``, ensure that only equivalent globals are ever merged (the
+    "one definition rule" — "ODR"). Such languages can use the
+    ``linkonce_odr`` and ``weak_odr`` linkage types to indicate that the
+    global will only be merged with equivalent globals. These linkage
+    types are otherwise the same as their non-``odr`` versions.
+``linkonce_odr_auto_hide``
+    Similar to "``linkonce_odr``", but nothing in the translation unit
+    takes the address of this definition. For instance, functions that
+    had an inline definition, but the compiler decided not to inline it.
+    ``linkonce_odr_auto_hide`` may have only ``default`` visibility. The
+    symbols are removed by the linker from the final linked image
+    (executable or dynamic library).
+``external``
+    If none of the above identifiers are used, the global is externally
+    visible, meaning that it participates in linkage and can be used to
+    resolve external symbol references.
+
+The next two types of linkage are targeted for Microsoft Windows
+platform only. They are designed to support importing (exporting)
+symbols from (to) DLLs (Dynamic Link Libraries).
+
+``dllimport``
+    "``dllimport``" linkage causes the compiler to reference a function
+    or variable via a global pointer to a pointer that is set up by the
+    DLL exporting the symbol. On Microsoft Windows targets, the pointer
+    name is formed by combining ``__imp_`` and the function or variable
+    name.
+``dllexport``
+    "``dllexport``" linkage causes the compiler to provide a global
+    pointer to a pointer in a DLL, so that it can be referenced with the
+    ``dllimport`` attribute. On Microsoft Windows targets, the pointer
+    name is formed by combining ``__imp_`` and the function or variable
+    name.
+
+For example, since the "``.LC0``" variable is defined to be internal, if
+another module defined a "``.LC0``" variable and was linked with this
+one, one of the two would be renamed, preventing a collision. Since
+"``main``" and "``puts``" are external (i.e., lacking any linkage
+declarations), they are accessible outside of the current module.
+
+It is illegal for a function *declaration* to have any linkage type
+other than ``external``, ``dllimport`` or ``extern_weak``.
+
+Aliases can have only ``external``, ``internal``, ``weak`` or
+``weak_odr`` linkages.
+
+.. _callingconv:
+
+Calling Conventions
+-------------------
+
+LLVM :ref:`functions <functionstructure>`, :ref:`calls <i_call>` and
+:ref:`invokes <i_invoke>` can all have an optional calling convention
+specified for the call. The calling convention of any pair of dynamic
+caller/callee must match, or the behavior of the program is undefined.
+The following calling conventions are supported by LLVM, and more may be
+added in the future:
+
+"``ccc``" - The C calling convention
+    This calling convention (the default if no other calling convention
+    is specified) matches the target C calling conventions. This calling
+    convention supports varargs function calls and tolerates some
+    mismatch in the declared prototype and implemented declaration of
+    the function (as does normal C).
+"``fastcc``" - The fast calling convention
+    This calling convention attempts to make calls as fast as possible
+    (e.g. by passing things in registers). This calling convention
+    allows the target to use whatever tricks it wants to produce fast
+    code for the target, without having to conform to an externally
+    specified ABI (Application Binary Interface). `Tail calls can only
+    be optimized when this, the GHC or the HiPE convention is
+    used. <CodeGenerator.html#id80>`_ This calling convention does not
+    support varargs and requires the prototype of all callees to exactly
+    match the prototype of the function definition.
+"``coldcc``" - The cold calling convention
+    This calling convention attempts to make code in the caller as
+    efficient as possible under the assumption that the call is not
+    commonly executed. As such, these calls often preserve all registers
+    so that the call does not break any live ranges in the caller side.
+    This calling convention does not support varargs and requires the
+    prototype of all callees to exactly match the prototype of the
+    function definition.
+"``cc 10``" - GHC convention
+    This calling convention has been implemented specifically for use by
+    the `Glasgow Haskell Compiler (GHC) <http://www.haskell.org/ghc>`_.
+    It passes everything in registers, going to extremes to achieve this
+    by disabling callee save registers. This calling convention should
+    not be used lightly but only for specific situations such as an
+    alternative to the *register pinning* performance technique often
+    used when implementing functional programming languages. At the
+    moment only X86 supports this convention and it has the following
+    limitations:
+
+    -  On *X86-32* only supports up to 4 bit type parameters. No
+       floating point types are supported.
+    -  On *X86-64* only supports up to 10 bit type parameters and 6
+       floating point parameters.
+
+    This calling convention supports `tail call
+    optimization <CodeGenerator.html#id80>`_ but requires both the
+    caller and callee are using it.
+"``cc 11``" - The HiPE calling convention
+    This calling convention has been implemented specifically for use by
+    the `High-Performance Erlang
+    (HiPE) <http://www.it.uu.se/research/group/hipe/>`_ compiler, *the*
+    native code compiler of the `Ericsson's Open Source Erlang/OTP
+    system <http://www.erlang.org/download.shtml>`_. It uses more
+    registers for argument passing than the ordinary C calling
+    convention and defines no callee-saved registers. The calling
+    convention properly supports `tail call
+    optimization <CodeGenerator.html#id80>`_ but requires that both the
+    caller and the callee use it. It uses a *register pinning*
+    mechanism, similar to GHC's convention, for keeping frequently
+    accessed runtime components pinned to specific hardware registers.
+    At the moment only X86 supports this convention (both 32 and 64
+    bit).
+"``cc <n>``" - Numbered convention
+    Any calling convention may be specified by number, allowing
+    target-specific calling conventions to be used. Target specific
+    calling conventions start at 64.
+
+More calling conventions can be added/defined on an as-needed basis, to
+support Pascal conventions or any other well-known target-independent
+convention.
+
+Visibility Styles
+-----------------
+
+All Global Variables and Functions have one of the following visibility
+styles:
+
+"``default``" - Default style
+    On targets that use the ELF object file format, default visibility
+    means that the declaration is visible to other modules and, in
+    shared libraries, means that the declared entity may be overridden.
+    On Darwin, default visibility means that the declaration is visible
+    to other modules. Default visibility corresponds to "external
+    linkage" in the language.
+"``hidden``" - Hidden style
+    Two declarations of an object with hidden visibility refer to the
+    same object if they are in the same shared object. Usually, hidden
+    visibility indicates that the symbol will not be placed into the
+    dynamic symbol table, so no other module (executable or shared
+    library) can reference it directly.
+"``protected``" - Protected style
+    On ELF, protected visibility indicates that the symbol will be
+    placed in the dynamic symbol table, but that references within the
+    defining module will bind to the local symbol. That is, the symbol
+    cannot be overridden by another module.
+
+Named Types
+-----------
+
+LLVM IR allows you to specify name aliases for certain types. This can
+make it easier to read the IR and make the IR more condensed
+(particularly when recursive types are involved). An example of a name
+specification is:
+
+.. code-block:: llvm
+
+    %mytype = type { %mytype*, i32 }
+
+You may give a name to any :ref:`type <typesystem>` except
+":ref:`void <t_void>`". Type name aliases may be used anywhere a type is
+expected with the syntax "%mytype".
+
+Note that type names are aliases for the structural type that they
+indicate, and that you can therefore specify multiple names for the same
+type. This often leads to confusing behavior when dumping out a .ll
+file. Since LLVM IR uses structural typing, the name is not part of the
+type. When printing out LLVM IR, the printer will pick *one name* to
+render all types of a particular shape. This means that if you have code
+where two different source types end up having the same LLVM type, that
+the dumper will sometimes print the "wrong" or unexpected type. This is
+an important design point and isn't going to change.
+
+.. _globalvars:
+
+Global Variables
+----------------
+
+Global variables define regions of memory allocated at compilation time
+instead of run-time. Global variables may optionally be initialized, may
+have an explicit section to be placed in, and may have an optional
+explicit alignment specified.
+
+A variable may be defined as ``thread_local``, which means that it will
+not be shared by threads (each thread will have a separated copy of the
+variable). Not all targets support thread-local variables. Optionally, a
+TLS model may be specified:
+
+``localdynamic``
+    For variables that are only used within the current shared library.
+``initialexec``
+    For variables in modules that will not be loaded dynamically.
+``localexec``
+    For variables defined in the executable and only used within it.
+
+The models correspond to the ELF TLS models; see `ELF Handling For
+Thread-Local Storage <http://people.redhat.com/drepper/tls.pdf>`_ for
+more information on under which circumstances the different models may
+be used. The target may choose a different TLS model if the specified
+model is not supported, or if a better choice of model can be made.
+
+A variable may be defined as a global "constant," which indicates that
+the contents of the variable will **never** be modified (enabling better
+optimization, allowing the global data to be placed in the read-only
+section of an executable, etc). Note that variables that need runtime
+initialization cannot be marked "constant" as there is a store to the
+variable.
+
+LLVM explicitly allows *declarations* of global variables to be marked
+constant, even if the final definition of the global is not. This
+capability can be used to enable slightly better optimization of the
+program, but requires the language definition to guarantee that
+optimizations based on the 'constantness' are valid for the translation
+units that do not include the definition.
+
+As SSA values, global variables define pointer values that are in scope
+(i.e. they dominate) all basic blocks in the program. Global variables
+always define a pointer to their "content" type because they describe a
+region of memory, and all memory objects in LLVM are accessed through
+pointers.
+
+Global variables can be marked with ``unnamed_addr`` which indicates
+that the address is not significant, only the content. Constants marked
+like this can be merged with other constants if they have the same
+initializer. Note that a constant with significant address *can* be
+merged with a ``unnamed_addr`` constant, the result being a constant
+whose address is significant.
+
+A global variable may be declared to reside in a target-specific
+numbered address space. For targets that support them, address spaces
+may affect how optimizations are performed and/or what target
+instructions are used to access the variable. The default address space
+is zero. The address space qualifier must precede any other attributes.
+
+LLVM allows an explicit section to be specified for globals. If the
+target supports it, it will emit globals to the section specified.
+
+An explicit alignment may be specified for a global, which must be a
+power of 2. If not present, or if the alignment is set to zero, the
+alignment of the global is set by the target to whatever it feels
+convenient. If an explicit alignment is specified, the global is forced
+to have exactly that alignment. Targets and optimizers are not allowed
+to over-align the global if the global has an assigned section. In this
+case, the extra alignment could be observable: for example, code could
+assume that the globals are densely packed in their section and try to
+iterate over them as an array, alignment padding would break this
+iteration.
+
+For example, the following defines a global in a numbered address space
+with an initializer, section, and alignment:
+
+.. code-block:: llvm
+
+    @G = addrspace(5) constant float 1.0, section "foo", align 4
+
+The following example defines a thread-local global with the
+``initialexec`` TLS model:
+
+.. code-block:: llvm
+
+    @G = thread_local(initialexec) global i32 0, align 4
+
+.. _functionstructure:
+
+Functions
+---------
+
+LLVM function definitions consist of the "``define``" keyword, an
+optional :ref:`linkage type <linkage>`, an optional :ref:`visibility
+style <visibility>`, an optional :ref:`calling convention <callingconv>`,
+an optional ``unnamed_addr`` attribute, a return type, an optional
+:ref:`parameter attribute <paramattrs>` for the return type, a function
+name, a (possibly empty) argument list (each with optional :ref:`parameter
+attributes <paramattrs>`), optional :ref:`function attributes <fnattrs>`,
+an optional section, an optional alignment, an optional :ref:`garbage
+collector name <gc>`, an opening curly brace, a list of basic blocks,
+and a closing curly brace.
+
+LLVM function declarations consist of the "``declare``" keyword, an
+optional :ref:`linkage type <linkage>`, an optional :ref:`visibility
+style <visibility>`, an optional :ref:`calling convention <callingconv>`,
+an optional ``unnamed_addr`` attribute, a return type, an optional
+:ref:`parameter attribute <paramattrs>` for the return type, a function
+name, a possibly empty list of arguments, an optional alignment, and an
+optional :ref:`garbage collector name <gc>`.
+
+A function definition contains a list of basic blocks, forming the CFG
+(Control Flow Graph) for the function. Each basic block may optionally
+start with a label (giving the basic block a symbol table entry),
+contains a list of instructions, and ends with a
+:ref:`terminator <terminators>` instruction (such as a branch or function
+return).
+
+The first basic block in a function is special in two ways: it is
+immediately executed on entrance to the function, and it is not allowed
+to have predecessor basic blocks (i.e. there can not be any branches to
+the entry block of a function). Because the block can have no
+predecessors, it also cannot have any :ref:`PHI nodes <i_phi>`.
+
+LLVM allows an explicit section to be specified for functions. If the
+target supports it, it will emit functions to the section specified.
+
+An explicit alignment may be specified for a function. If not present,
+or if the alignment is set to zero, the alignment of the function is set
+by the target to whatever it feels convenient. If an explicit alignment
+is specified, the function is forced to have at least that much
+alignment. All alignments must be a power of 2.
+
+If the ``unnamed_addr`` attribute is given, the address is know to not
+be significant and two identical functions can be merged.
+
+Syntax::
+
+    define [linkage] [visibility]
+           [cconv] [ret attrs]
+           <ResultType> @<FunctionName> ([argument list])
+           [fn Attrs] [section "name"] [align N]
+           [gc] { ... }
+
+Aliases
+-------
+
+Aliases act as "second name" for the aliasee value (which can be either
+function, global variable, another alias or bitcast of global value).
+Aliases may have an optional :ref:`linkage type <linkage>`, and an optional
+:ref:`visibility style <visibility>`.
+
+Syntax::
+
+    @<Name> = alias [Linkage] [Visibility] <AliaseeTy> @<Aliasee>
+
+.. _namedmetadatastructure:
+
+Named Metadata
+--------------
+
+Named metadata is a collection of metadata. :ref:`Metadata
+nodes <metadata>` (but not metadata strings) are the only valid
+operands for a named metadata.
+
+Syntax::
+
+    ; Some unnamed metadata nodes, which are referenced by the named metadata.
+    !0 = metadata !{metadata !"zero"}
+    !1 = metadata !{metadata !"one"}
+    !2 = metadata !{metadata !"two"}
+    ; A named metadata.
+    !name = !{!0, !1, !2}
+
+.. _paramattrs:
+
+Parameter Attributes
+--------------------
+
+The return type and each parameter of a function type may have a set of
+*parameter attributes* associated with them. Parameter attributes are
+used to communicate additional information about the result or
+parameters of a function. Parameter attributes are considered to be part
+of the function, not of the function type, so functions with different
+parameter attributes can have the same function type.
+
+Parameter attributes are simple keywords that follow the type specified.
+If multiple parameter attributes are needed, they are space separated.
+For example:
+
+.. code-block:: llvm
+
+    declare i32 @printf(i8* noalias nocapture, ...)
+    declare i32 @atoi(i8 zeroext)
+    declare signext i8 @returns_signed_char()
+
+Note that any attributes for the function result (``nounwind``,
+``readonly``) come immediately after the argument list.
+
+Currently, only the following parameter attributes are defined:
+
+``zeroext``
+    This indicates to the code generator that the parameter or return
+    value should be zero-extended to the extent required by the target's
+    ABI (which is usually 32-bits, but is 8-bits for a i1 on x86-64) by
+    the caller (for a parameter) or the callee (for a return value).
+``signext``
+    This indicates to the code generator that the parameter or return
+    value should be sign-extended to the extent required by the target's
+    ABI (which is usually 32-bits) by the caller (for a parameter) or
+    the callee (for a return value).
+``inreg``
+    This indicates that this parameter or return value should be treated
+    in a special target-dependent fashion during while emitting code for
+    a function call or return (usually, by putting it in a register as
+    opposed to memory, though some targets use it to distinguish between
+    two different kinds of registers). Use of this attribute is
+    target-specific.
+``byval``
+    This indicates that the pointer parameter should really be passed by
+    value to the function. The attribute implies that a hidden copy of
+    the pointee is made between the caller and the callee, so the callee
+    is unable to modify the value in the caller. This attribute is only
+    valid on LLVM pointer arguments. It is generally used to pass
+    structs and arrays by value, but is also valid on pointers to
+    scalars. The copy is considered to belong to the caller not the
+    callee (for example, ``readonly`` functions should not write to
+    ``byval`` parameters). This is not a valid attribute for return
+    values.
+
+    The byval attribute also supports specifying an alignment with the
+    align attribute. It indicates the alignment of the stack slot to
+    form and the known alignment of the pointer specified to the call
+    site. If the alignment is not specified, then the code generator
+    makes a target-specific assumption.
+
+``sret``
+    This indicates that the pointer parameter specifies the address of a
+    structure that is the return value of the function in the source
+    program. This pointer must be guaranteed by the caller to be valid:
+    loads and stores to the structure may be assumed by the callee to
+    not to trap and to be properly aligned. This may only be applied to
+    the first parameter. This is not a valid attribute for return
+    values.
+``noalias``
+    This indicates that pointer values `*based* <pointeraliasing>` on
+    the argument or return value do not alias pointer values which are
+    not *based* on it, ignoring certain "irrelevant" dependencies. For a
+    call to the parent function, dependencies between memory references
+    from before or after the call and from those during the call are
+    "irrelevant" to the ``noalias`` keyword for the arguments and return
+    value used in that call. The caller shares the responsibility with
+    the callee for ensuring that these requirements are met. For further
+    details, please see the discussion of the NoAlias response in `alias
+    analysis <AliasAnalysis.html#MustMayNo>`_.
+
+    Note that this definition of ``noalias`` is intentionally similar
+    to the definition of ``restrict`` in C99 for function arguments,
+    though it is slightly weaker.
+
+    For function return values, C99's ``restrict`` is not meaningful,
+    while LLVM's ``noalias`` is.
+``nocapture``
+    This indicates that the callee does not make any copies of the
+    pointer that outlive the callee itself. This is not a valid
+    attribute for return values.
+
+.. _nest:
+
+``nest``
+    This indicates that the pointer parameter can be excised using the
+    :ref:`trampoline intrinsics <int_trampoline>`. This is not a valid
+    attribute for return values.
+
+.. _gc:
+
+Garbage Collector Names
+-----------------------
+
+Each function may specify a garbage collector name, which is simply a
+string:
+
+.. code-block:: llvm
+
+    define void @f() gc "name" { ... }
+
+The compiler declares the supported values of *name*. Specifying a
+collector which will cause the compiler to alter its output in order to
+support the named garbage collection algorithm.
+
+.. _fnattrs:
+
+Function Attributes
+-------------------
+
+Function attributes are set to communicate additional information about
+a function. Function attributes are considered to be part of the
+function, not of the function type, so functions with different function
+attributes can have the same function type.
+
+Function attributes are simple keywords that follow the type specified.
+If multiple attributes are needed, they are space separated. For
+example:
+
+.. code-block:: llvm
+
+    define void @f() noinline { ... }
+    define void @f() alwaysinline { ... }
+    define void @f() alwaysinline optsize { ... }
+    define void @f() optsize { ... }
+
+``address_safety``
+    This attribute indicates that the address safety analysis is enabled
+    for this function.
+``alignstack(<n>)``
+    This attribute indicates that, when emitting the prologue and
+    epilogue, the backend should forcibly align the stack pointer.
+    Specify the desired alignment, which must be a power of two, in
+    parentheses.
+``alwaysinline``
+    This attribute indicates that the inliner should attempt to inline
+    this function into callers whenever possible, ignoring any active
+    inlining size threshold for this caller.
+``nonlazybind``
+    This attribute suppresses lazy symbol binding for the function. This
+    may make calls to the function faster, at the cost of extra program
+    startup time if the function is not called during program startup.
+``inlinehint``
+    This attribute indicates that the source code contained a hint that
+    inlining this function is desirable (such as the "inline" keyword in
+    C/C++). It is just a hint; it imposes no requirements on the
+    inliner.
+``naked``
+    This attribute disables prologue / epilogue emission for the
+    function. This can have very system-specific consequences.
+``noimplicitfloat``
+    This attributes disables implicit floating point instructions.
+``noinline``
+    This attribute indicates that the inliner should never inline this
+    function in any situation. This attribute may not be used together
+    with the ``alwaysinline`` attribute.
+``noredzone``
+    This attribute indicates that the code generator should not use a
+    red zone, even if the target-specific ABI normally permits it.
+``noreturn``
+    This function attribute indicates that the function never returns
+    normally. This produces undefined behavior at runtime if the
+    function ever does dynamically return.
+``nounwind``
+    This function attribute indicates that the function never returns
+    with an unwind or exceptional control flow. If the function does
+    unwind, its runtime behavior is undefined.
+``optsize``
+    This attribute suggests that optimization passes and code generator
+    passes make choices that keep the code size of this function low,
+    and otherwise do optimizations specifically to reduce code size.
+``readnone``
+    This attribute indicates that the function computes its result (or
+    decides to unwind an exception) based strictly on its arguments,
+    without dereferencing any pointer arguments or otherwise accessing
+    any mutable state (e.g. memory, control registers, etc) visible to
+    caller functions. It does not write through any pointer arguments
+    (including ``byval`` arguments) and never changes any state visible
+    to callers. This means that it cannot unwind exceptions by calling
+    the ``C++`` exception throwing methods.
+``readonly``
+    This attribute indicates that the function does not write through
+    any pointer arguments (including ``byval`` arguments) or otherwise
+    modify any state (e.g. memory, control registers, etc) visible to
+    caller functions. It may dereference pointer arguments and read
+    state that may be set in the caller. A readonly function always
+    returns the same value (or unwinds an exception identically) when
+    called with the same set of arguments and global state. It cannot
+    unwind an exception by calling the ``C++`` exception throwing
+    methods.
+``returns_twice``
+    This attribute indicates that this function can return twice. The C
+    ``setjmp`` is an example of such a function. The compiler disables
+    some optimizations (like tail calls) in the caller of these
+    functions.
+``ssp``
+    This attribute indicates that the function should emit a stack
+    smashing protector. It is in the form of a "canary"—a random value
+    placed on the stack before the local variables that's checked upon
+    return from the function to see if it has been overwritten. A
+    heuristic is used to determine if a function needs stack protectors
+    or not.
+
+    If a function that has an ``ssp`` attribute is inlined into a
+    function that doesn't have an ``ssp`` attribute, then the resulting
+    function will have an ``ssp`` attribute.
+``sspreq``
+    This attribute indicates that the function should *always* emit a
+    stack smashing protector. This overrides the ``ssp`` function
+    attribute.
+
+    If a function that has an ``sspreq`` attribute is inlined into a
+    function that doesn't have an ``sspreq`` attribute or which has an
+    ``ssp`` attribute, then the resulting function will have an
+    ``sspreq`` attribute.
+``uwtable``
+    This attribute indicates that the ABI being targeted requires that
+    an unwind table entry be produce for this function even if we can
+    show that no exceptions passes by it. This is normally the case for
+    the ELF x86-64 abi, but it can be disabled for some compilation
+    units.
+
+.. _moduleasm:
+
+Module-Level Inline Assembly
+----------------------------
+
+Modules may contain "module-level inline asm" blocks, which corresponds
+to the GCC "file scope inline asm" blocks. These blocks are internally
+concatenated by LLVM and treated as a single unit, but may be separated
+in the ``.ll`` file if desired. The syntax is very simple:
+
+.. code-block:: llvm
+
+    module asm "inline asm code goes here"
+    module asm "more can go here"
+
+The strings can contain any character by escaping non-printable
+characters. The escape sequence used is simply "\\xx" where "xx" is the
+two digit hex code for the number.
+
+The inline asm code is simply printed to the machine code .s file when
+assembly code is generated.
+
+Data Layout
+-----------
+
+A module may specify a target specific data layout string that specifies
+how data is to be laid out in memory. The syntax for the data layout is
+simply:
+
+.. code-block:: llvm
+
+    target datalayout = "layout specification"
+
+The *layout specification* consists of a list of specifications
+separated by the minus sign character ('-'). Each specification starts
+with a letter and may include other information after the letter to
+define some aspect of the data layout. The specifications accepted are
+as follows:
+
+``E``
+    Specifies that the target lays out data in big-endian form. That is,
+    the bits with the most significance have the lowest address
+    location.
+``e``
+    Specifies that the target lays out data in little-endian form. That
+    is, the bits with the least significance have the lowest address
+    location.
+``S<size>``
+    Specifies the natural alignment of the stack in bits. Alignment
+    promotion of stack variables is limited to the natural stack
+    alignment to avoid dynamic stack realignment. The stack alignment
+    must be a multiple of 8-bits. If omitted, the natural stack
+    alignment defaults to "unspecified", which does not prevent any
+    alignment promotions.
+``p[n]:<size>:<abi>:<pref>``
+    This specifies the *size* of a pointer and its ``<abi>`` and
+    ``<pref>``\erred alignments for address space ``n``. All sizes are in
+    bits. Specifying the ``<pref>`` alignment is optional. If omitted, the
+    preceding ``:`` should be omitted too. The address space, ``n`` is
+    optional, and if not specified, denotes the default address space 0.
+    The value of ``n`` must be in the range [1,2^23).
+``i<size>:<abi>:<pref>``
+    This specifies the alignment for an integer type of a given bit
+    ``<size>``. The value of ``<size>`` must be in the range [1,2^23).
+``v<size>:<abi>:<pref>``
+    This specifies the alignment for a vector type of a given bit
+    ``<size>``.
+``f<size>:<abi>:<pref>``
+    This specifies the alignment for a floating point type of a given bit
+    ``<size>``. Only values of ``<size>`` that are supported by the target
+    will work. 32 (float) and 64 (double) are supported on all targets; 80
+    or 128 (different flavors of long double) are also supported on some
+    targets.
+``a<size>:<abi>:<pref>``
+    This specifies the alignment for an aggregate type of a given bit
+    ``<size>``.
+``s<size>:<abi>:<pref>``
+    This specifies the alignment for a stack object of a given bit
+    ``<size>``.
+``n<size1>:<size2>:<size3>...``
+    This specifies a set of native integer widths for the target CPU in
+    bits. For example, it might contain ``n32`` for 32-bit PowerPC,
+    ``n32:64`` for PowerPC 64, or ``n8:16:32:64`` for X86-64. Elements of
+    this set are considered to support most general arithmetic operations
+    efficiently.
+
+When constructing the data layout for a given target, LLVM starts with a
+default set of specifications which are then (possibly) overridden by
+the specifications in the ``datalayout`` keyword. The default
+specifications are given in this list:
+
+-  ``E`` - big endian
+-  ``p:64:64:64`` - 64-bit pointers with 64-bit alignment
+-  ``p1:32:32:32`` - 32-bit pointers with 32-bit alignment for address
+   space 1
+-  ``p2:16:32:32`` - 16-bit pointers with 32-bit alignment for address
+   space 2
+-  ``i1:8:8`` - i1 is 8-bit (byte) aligned
+-  ``i8:8:8`` - i8 is 8-bit (byte) aligned
+-  ``i16:16:16`` - i16 is 16-bit aligned
+-  ``i32:32:32`` - i32 is 32-bit aligned
+-  ``i64:32:64`` - i64 has ABI alignment of 32-bits but preferred
+   alignment of 64-bits
+-  ``f32:32:32`` - float is 32-bit aligned
+-  ``f64:64:64`` - double is 64-bit aligned
+-  ``v64:64:64`` - 64-bit vector is 64-bit aligned
+-  ``v128:128:128`` - 128-bit vector is 128-bit aligned
+-  ``a0:0:1`` - aggregates are 8-bit aligned
+-  ``s0:64:64`` - stack objects are 64-bit aligned
+
+When LLVM is determining the alignment for a given type, it uses the
+following rules:
+
+#. If the type sought is an exact match for one of the specifications,
+   that specification is used.
+#. If no match is found, and the type sought is an integer type, then
+   the smallest integer type that is larger than the bitwidth of the
+   sought type is used. If none of the specifications are larger than
+   the bitwidth then the largest integer type is used. For example,
+   given the default specifications above, the i7 type will use the
+   alignment of i8 (next largest) while both i65 and i256 will use the
+   alignment of i64 (largest specified).
+#. If no match is found, and the type sought is a vector type, then the
+   largest vector type that is smaller than the sought vector type will
+   be used as a fall back. This happens because <128 x double> can be
+   implemented in terms of 64 <2 x double>, for example.
+
+The function of the data layout string may not be what you expect.
+Notably, this is not a specification from the frontend of what alignment
+the code generator should use.
+
+Instead, if specified, the target data layout is required to match what
+the ultimate *code generator* expects. This string is used by the
+mid-level optimizers to improve code, and this only works if it matches
+what the ultimate code generator uses. If you would like to generate IR
+that does not embed this target-specific detail into the IR, then you
+don't have to specify the string. This will disable some optimizations
+that require precise layout information, but this also prevents those
+optimizations from introducing target specificity into the IR.
+
+.. _pointeraliasing:
+
+Pointer Aliasing Rules
+----------------------
+
+Any memory access must be done through a pointer value associated with
+an address range of the memory access, otherwise the behavior is
+undefined. Pointer values are associated with address ranges according
+to the following rules:
+
+-  A pointer value is associated with the addresses associated with any
+   value it is *based* on.
+-  An address of a global variable is associated with the address range
+   of the variable's storage.
+-  The result value of an allocation instruction is associated with the
+   address range of the allocated storage.
+-  A null pointer in the default address-space is associated with no
+   address.
+-  An integer constant other than zero or a pointer value returned from
+   a function not defined within LLVM may be associated with address
+   ranges allocated through mechanisms other than those provided by
+   LLVM. Such ranges shall not overlap with any ranges of addresses
+   allocated by mechanisms provided by LLVM.
+
+A pointer value is *based* on another pointer value according to the
+following rules:
+
+-  A pointer value formed from a ``getelementptr`` operation is *based*
+   on the first operand of the ``getelementptr``.
+-  The result value of a ``bitcast`` is *based* on the operand of the
+   ``bitcast``.
+-  A pointer value formed by an ``inttoptr`` is *based* on all pointer
+   values that contribute (directly or indirectly) to the computation of
+   the pointer's value.
+-  The "*based* on" relationship is transitive.
+
+Note that this definition of *"based"* is intentionally similar to the
+definition of *"based"* in C99, though it is slightly weaker.
+
+LLVM IR does not associate types with memory. The result type of a
+``load`` merely indicates the size and alignment of the memory from
+which to load, as well as the interpretation of the value. The first
+operand type of a ``store`` similarly only indicates the size and
+alignment of the store.
+
+Consequently, type-based alias analysis, aka TBAA, aka
+``-fstrict-aliasing``, is not applicable to general unadorned LLVM IR.
+:ref:`Metadata <metadata>` may be used to encode additional information
+which specialized optimization passes may use to implement type-based
+alias analysis.
+
+.. _volatile:
+
+Volatile Memory Accesses
+------------------------
+
+Certain memory accesses, such as :ref:`load <i_load>`'s,
+:ref:`store <i_store>`'s, and :ref:`llvm.memcpy <int_memcpy>`'s may be
+marked ``volatile``. The optimizers must not change the number of
+volatile operations or change their order of execution relative to other
+volatile operations. The optimizers *may* change the order of volatile
+operations relative to non-volatile operations. This is not Java's
+"volatile" and has no cross-thread synchronization behavior.
+
+.. _memmodel:
+
+Memory Model for Concurrent Operations
+--------------------------------------
+
+The LLVM IR does not define any way to start parallel threads of
+execution or to register signal handlers. Nonetheless, there are
+platform-specific ways to create them, and we define LLVM IR's behavior
+in their presence. This model is inspired by the C++0x memory model.
+
+For a more informal introduction to this model, see the :doc:`Atomics`.
+
+We define a *happens-before* partial order as the least partial order
+that
+
+-  Is a superset of single-thread program order, and
+-  When a *synchronizes-with* ``b``, includes an edge from ``a`` to
+   ``b``. *Synchronizes-with* pairs are introduced by platform-specific
+   techniques, like pthread locks, thread creation, thread joining,
+   etc., and by atomic instructions. (See also :ref:`Atomic Memory Ordering
+   Constraints <ordering>`).
+
+Note that program order does not introduce *happens-before* edges
+between a thread and signals executing inside that thread.
+
+Every (defined) read operation (load instructions, memcpy, atomic
+loads/read-modify-writes, etc.) R reads a series of bytes written by
+(defined) write operations (store instructions, atomic
+stores/read-modify-writes, memcpy, etc.). For the purposes of this
+section, initialized globals are considered to have a write of the
+initializer which is atomic and happens before any other read or write
+of the memory in question. For each byte of a read R, R\ :sub:`byte`
+may see any write to the same byte, except:
+
+-  If write\ :sub:`1`  happens before write\ :sub:`2`, and
+   write\ :sub:`2` happens before R\ :sub:`byte`, then
+   R\ :sub:`byte` does not see write\ :sub:`1`.
+-  If R\ :sub:`byte` happens before write\ :sub:`3`, then
+   R\ :sub:`byte` does not see write\ :sub:`3`.
+
+Given that definition, R\ :sub:`byte` is defined as follows:
+
+-  If R is volatile, the result is target-dependent. (Volatile is
+   supposed to give guarantees which can support ``sig_atomic_t`` in
+   C/C++, and may be used for accesses to addresses which do not behave
+   like normal memory. It does not generally provide cross-thread
+   synchronization.)
+-  Otherwise, if there is no write to the same byte that happens before
+   R\ :sub:`byte`, R\ :sub:`byte` returns ``undef`` for that byte.
+-  Otherwise, if R\ :sub:`byte` may see exactly one write,
+   R\ :sub:`byte` returns the value written by that write.
+-  Otherwise, if R is atomic, and all the writes R\ :sub:`byte` may
+   see are atomic, it chooses one of the values written. See the :ref:`Atomic
+   Memory Ordering Constraints <ordering>` section for additional
+   constraints on how the choice is made.
+-  Otherwise R\ :sub:`byte` returns ``undef``.
+
+R returns the value composed of the series of bytes it read. This
+implies that some bytes within the value may be ``undef`` **without**
+the entire value being ``undef``. Note that this only defines the
+semantics of the operation; it doesn't mean that targets will emit more
+than one instruction to read the series of bytes.
+
+Note that in cases where none of the atomic intrinsics are used, this
+model places only one restriction on IR transformations on top of what
+is required for single-threaded execution: introducing a store to a byte
+which might not otherwise be stored is not allowed in general.
+(Specifically, in the case where another thread might write to and read
+from an address, introducing a store can change a load that may see
+exactly one write into a load that may see multiple writes.)
+
+.. _ordering:
+
+Atomic Memory Ordering Constraints
+----------------------------------
+
+Atomic instructions (:ref:`cmpxchg <i_cmpxchg>`,
+:ref:`atomicrmw <i_atomicrmw>`, :ref:`fence <i_fence>`,
+:ref:`atomic load <i_load>`, and :ref:`atomic store <i_store>`) take
+an ordering parameter that determines which other atomic instructions on
+the same address they *synchronize with*. These semantics are borrowed
+from Java and C++0x, but are somewhat more colloquial. If these
+descriptions aren't precise enough, check those specs (see spec
+references in the :doc:`atomics guide <Atomics>`).
+:ref:`fence <i_fence>` instructions treat these orderings somewhat
+differently since they don't take an address. See that instruction's
+documentation for details.
+
+For a simpler introduction to the ordering constraints, see the
+:doc:`Atomics`.
+
+``unordered``
+    The set of values that can be read is governed by the happens-before
+    partial order. A value cannot be read unless some operation wrote
+    it. This is intended to provide a guarantee strong enough to model
+    Java's non-volatile shared variables. This ordering cannot be
+    specified for read-modify-write operations; it is not strong enough
+    to make them atomic in any interesting way.
+``monotonic``
+    In addition to the guarantees of ``unordered``, there is a single
+    total order for modifications by ``monotonic`` operations on each
+    address. All modification orders must be compatible with the
+    happens-before order. There is no guarantee that the modification
+    orders can be combined to a global total order for the whole program
+    (and this often will not be possible). The read in an atomic
+    read-modify-write operation (:ref:`cmpxchg <i_cmpxchg>` and
+    :ref:`atomicrmw <i_atomicrmw>`) reads the value in the modification
+    order immediately before the value it writes. If one atomic read
+    happens before another atomic read of the same address, the later
+    read must see the same value or a later value in the address's
+    modification order. This disallows reordering of ``monotonic`` (or
+    stronger) operations on the same address. If an address is written
+    ``monotonic``-ally by one thread, and other threads ``monotonic``-ally
+    read that address repeatedly, the other threads must eventually see
+    the write. This corresponds to the C++0x/C1x
+    ``memory_order_relaxed``.
+``acquire``
+    In addition to the guarantees of ``monotonic``, a
+    *synchronizes-with* edge may be formed with a ``release`` operation.
+    This is intended to model C++'s ``memory_order_acquire``.
+``release``
+    In addition to the guarantees of ``monotonic``, if this operation
+    writes a value which is subsequently read by an ``acquire``
+    operation, it *synchronizes-with* that operation. (This isn't a
+    complete description; see the C++0x definition of a release
+    sequence.) This corresponds to the C++0x/C1x
+    ``memory_order_release``.
+``acq_rel`` (acquire+release)
+    Acts as both an ``acquire`` and ``release`` operation on its
+    address. This corresponds to the C++0x/C1x ``memory_order_acq_rel``.
+``seq_cst`` (sequentially consistent)
+    In addition to the guarantees of ``acq_rel`` (``acquire`` for an
+    operation which only reads, ``release`` for an operation which only
+    writes), there is a global total order on all
+    sequentially-consistent operations on all addresses, which is
+    consistent with the *happens-before* partial order and with the
+    modification orders of all the affected addresses. Each
+    sequentially-consistent read sees the last preceding write to the
+    same address in this global order. This corresponds to the C++0x/C1x
+    ``memory_order_seq_cst`` and Java volatile.
+
+.. _singlethread:
+
+If an atomic operation is marked ``singlethread``, it only *synchronizes
+with* or participates in modification and seq\_cst total orderings with
+other operations running in the same thread (for example, in signal
+handlers).
+
+.. _fastmath:
+
+Fast-Math Flags
+---------------
+
+LLVM IR floating-point binary ops (:ref:`fadd <i_fadd>`,
+:ref:`fsub <i_fsub>`, :ref:`fmul <i_fmul>`, :ref:`fdiv <i_fdiv>`,
+:ref:`frem <i_frem>`) have the following flags that can set to enable
+otherwise unsafe floating point operations
+
+``nnan``
+   No NaNs - Allow optimizations to assume the arguments and result are not
+   NaN. Such optimizations are required to retain defined behavior over
+   NaNs, but the value of the result is undefined.
+
+``ninf``
+   No Infs - Allow optimizations to assume the arguments and result are not
+   +/-Inf. Such optimizations are required to retain defined behavior over
+   +/-Inf, but the value of the result is undefined.
+
+``nsz``
+   No Signed Zeros - Allow optimizations to treat the sign of a zero
+   argument or result as insignificant.
+
+``arcp``
+   Allow Reciprocal - Allow optimizations to use the reciprocal of an
+   argument rather than perform division.
+
+``fast``
+   Fast - Allow algebraically equivalent transformations that may
+   dramatically change results in floating point (e.g. reassociate). This
+   flag implies all the others.
+
+.. _typesystem:
+
+Type System
+===========
+
+The LLVM type system is one of the most important features of the
+intermediate representation. Being typed enables a number of
+optimizations to be performed on the intermediate representation
+directly, without having to do extra analyses on the side before the
+transformation. A strong type system makes it easier to read the
+generated code and enables novel analyses and transformations that are
+not feasible to perform on normal three address code representations.
+
+Type Classifications
+--------------------
+
+The types fall into a few useful classifications:
+
+
+.. list-table::
+   :header-rows: 1
+
+   * - Classification
+     - Types
+
+   * - :ref:`integer <t_integer>`
+     - ``i1``, ``i2``, ``i3``, ... ``i8``, ... ``i16``, ... ``i32``, ...
+       ``i64``, ...
+
+   * - :ref:`floating point <t_floating>`
+     - ``half``, ``float``, ``double``, ``x86_fp80``, ``fp128``,
+       ``ppc_fp128``
+
+
+   * - first class
+
+       .. _t_firstclass:
+
+     - :ref:`integer <t_integer>`, :ref:`floating point <t_floating>`,
+       :ref:`pointer <t_pointer>`, :ref:`vector <t_vector>`,
+       :ref:`structure <t_struct>`, :ref:`array <t_array>`,
+       :ref:`label <t_label>`, :ref:`metadata <t_metadata>`.
+
+   * - :ref:`primitive <t_primitive>`
+     - :ref:`label <t_label>`,
+       :ref:`void <t_void>`,
+       :ref:`integer <t_integer>`,
+       :ref:`floating point <t_floating>`,
+       :ref:`x86mmx <t_x86mmx>`,
+       :ref:`metadata <t_metadata>`.
+
+   * - :ref:`derived <t_derived>`
+     - :ref:`array <t_array>`,
+       :ref:`function <t_function>`,
+       :ref:`pointer <t_pointer>`,
+       :ref:`structure <t_struct>`,
+       :ref:`vector <t_vector>`,
+       :ref:`opaque <t_opaque>`.
+
+The :ref:`first class <t_firstclass>` types are perhaps the most important.
+Values of these types are the only ones which can be produced by
+instructions.
+
+.. _t_primitive:
+
+Primitive Types
+---------------
+
+The primitive types are the fundamental building blocks of the LLVM
+system.
+
+.. _t_integer:
+
+Integer Type
+^^^^^^^^^^^^
+
+Overview:
+"""""""""
+
+The integer type is a very simple type that simply specifies an
+arbitrary bit width for the integer type desired. Any bit width from 1
+bit to 2\ :sup:`23`\ -1 (about 8 million) can be specified.
+
+Syntax:
+"""""""
+
+::
+
+      iN
+
+The number of bits the integer will occupy is specified by the ``N``
+value.
+
+Examples:
+"""""""""
+
++----------------+------------------------------------------------+
+| ``i1``         | a single-bit integer.                          |
++----------------+------------------------------------------------+
+| ``i32``        | a 32-bit integer.                              |
++----------------+------------------------------------------------+
+| ``i1942652``   | a really big integer of over 1 million bits.   |
++----------------+------------------------------------------------+
+
+.. _t_floating:
+
+Floating Point Types
+^^^^^^^^^^^^^^^^^^^^
+
+.. list-table::
+   :header-rows: 1
+
+   * - Type
+     - Description
+
+   * - ``half``
+     - 16-bit floating point value
+
+   * - ``float``
+     - 32-bit floating point value
+
+   * - ``double``
+     - 64-bit floating point value
+
+   * - ``fp128``
+     - 128-bit floating point value (112-bit mantissa)
+
+   * - ``x86_fp80``
+     -  80-bit floating point value (X87)
+
+   * - ``ppc_fp128``
+     - 128-bit floating point value (two 64-bits)
+
+.. _t_x86mmx:
+
+X86mmx Type
+^^^^^^^^^^^
+
+Overview:
+"""""""""
+
+The x86mmx type represents a value held in an MMX register on an x86
+machine. The operations allowed on it are quite limited: parameters and
+return values, load and store, and bitcast. User-specified MMX
+instructions are represented as intrinsic or asm calls with arguments
+and/or results of this type. There are no arrays, vectors or constants
+of this type.
+
+Syntax:
+"""""""
+
+::
+
+      x86mmx
+
+.. _t_void:
+
+Void Type
+^^^^^^^^^
+
+Overview:
+"""""""""
+
+The void type does not represent any value and has no size.
+
+Syntax:
+"""""""
+
+::
+
+      void
+
+.. _t_label:
+
+Label Type
+^^^^^^^^^^
+
+Overview:
+"""""""""
+
+The label type represents code labels.
+
+Syntax:
+"""""""
+
+::
+
+      label
+
+.. _t_metadata:
+
+Metadata Type
+^^^^^^^^^^^^^
+
+Overview:
+"""""""""
+
+The metadata type represents embedded metadata. No derived types may be
+created from metadata except for :ref:`function <t_function>` arguments.
+
+Syntax:
+"""""""
+
+::
+
+      metadata
+
+.. _t_derived:
+
+Derived Types
+-------------
+
+The real power in LLVM comes from the derived types in the system. This
+is what allows a programmer to represent arrays, functions, pointers,
+and other useful types. Each of these types contain one or more element
+types which may be a primitive type, or another derived type. For
+example, it is possible to have a two dimensional array, using an array
+as the element type of another array.
+
+.. _t_aggregate:
+
+Aggregate Types
+^^^^^^^^^^^^^^^
+
+Aggregate Types are a subset of derived types that can contain multiple
+member types. :ref:`Arrays <t_array>` and :ref:`structs <t_struct>` are
+aggregate types. :ref:`Vectors <t_vector>` are not considered to be
+aggregate types.
+
+.. _t_array:
+
+Array Type
+^^^^^^^^^^
+
+Overview:
+"""""""""
+
+The array type is a very simple derived type that arranges elements
+sequentially in memory. The array type requires a size (number of
+elements) and an underlying data type.
+
+Syntax:
+"""""""
+
+::
+
+      [<# elements> x <elementtype>]
+
+The number of elements is a constant integer value; ``elementtype`` may
+be any type with a size.
+
+Examples:
+"""""""""
+
++------------------+--------------------------------------+
+| ``[40 x i32]``   | Array of 40 32-bit integer values.   |
++------------------+--------------------------------------+
+| ``[41 x i32]``   | Array of 41 32-bit integer values.   |
++------------------+--------------------------------------+
+| ``[4 x i8]``     | Array of 4 8-bit integer values.     |
++------------------+--------------------------------------+
+
+Here are some examples of multidimensional arrays:
+
++-----------------------------+----------------------------------------------------------+
+| ``[3 x [4 x i32]]``         | 3x4 array of 32-bit integer values.                      |
++-----------------------------+----------------------------------------------------------+
+| ``[12 x [10 x float]]``     | 12x10 array of single precision floating point values.   |
++-----------------------------+----------------------------------------------------------+
+| ``[2 x [3 x [4 x i16]]]``   | 2x3x4 array of 16-bit integer values.                    |
++-----------------------------+----------------------------------------------------------+
+
+There is no restriction on indexing beyond the end of the array implied
+by a static type (though there are restrictions on indexing beyond the
+bounds of an allocated object in some cases). This means that
+single-dimension 'variable sized array' addressing can be implemented in
+LLVM with a zero length array type. An implementation of 'pascal style
+arrays' in LLVM could use the type "``{ i32, [0 x float]}``", for
+example.
+
+.. _t_function:
+
+Function Type
+^^^^^^^^^^^^^
+
+Overview:
+"""""""""
+
+The function type can be thought of as a function signature. It consists
+of a return type and a list of formal parameter types. The return type
+of a function type is a first class type or a void type.
+
+Syntax:
+"""""""
+
+::
+
+      <returntype> (<parameter list>)
+
+...where '``<parameter list>``' is a comma-separated list of type
+specifiers. Optionally, the parameter list may include a type ``...``,
+which indicates that the function takes a variable number of arguments.
+Variable argument functions can access their arguments with the
+:ref:`variable argument handling intrinsic <int_varargs>` functions.
+'``<returntype>``' is any type except :ref:`label <t_label>`.
+
+Examples:
+"""""""""
+
++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``i32 (i32)``                   | function taking an ``i32``, returning an ``i32``                                                                                                                    |
++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``float (i16, i32 *) *``        | :ref:`Pointer <t_pointer>` to a function that takes an ``i16`` and a :ref:`pointer <t_pointer>` to ``i32``, returning ``float``.                                    |
++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``i32 (i8*, ...)``              | A vararg function that takes at least one :ref:`pointer <t_pointer>` to ``i8`` (char in C), which returns an integer. This is the signature for ``printf`` in LLVM. |
++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``{i32, i32} (i32)``            | A function taking an ``i32``, returning a :ref:`structure <t_struct>` containing two ``i32`` values                                                                 |
++---------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+.. _t_struct:
+
+Structure Type
+^^^^^^^^^^^^^^
+
+Overview:
+"""""""""
+
+The structure type is used to represent a collection of data members
+together in memory. The elements of a structure may be any type that has
+a size.
+
+Structures in memory are accessed using '``load``' and '``store``' by
+getting a pointer to a field with the '``getelementptr``' instruction.
+Structures in registers are accessed using the '``extractvalue``' and
+'``insertvalue``' instructions.
+
+Structures may optionally be "packed" structures, which indicate that
+the alignment of the struct is one byte, and that there is no padding
+between the elements. In non-packed structs, padding between field types
+is inserted as defined by the DataLayout string in the module, which is
+required to match what the underlying code generator expects.
+
+Structures can either be "literal" or "identified". A literal structure
+is defined inline with other types (e.g. ``{i32, i32}*``) whereas
+identified types are always defined at the top level with a name.
+Literal types are uniqued by their contents and can never be recursive
+or opaque since there is no way to write one. Identified types can be
+recursive, can be opaqued, and are never uniqued.
+
+Syntax:
+"""""""
+
+::
+
+      %T1 = type { <type list> }     ; Identified normal struct type
+      %T2 = type <{ <type list> }>   ; Identified packed struct type
+
+Examples:
+"""""""""
+
++------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``{ i32, i32, i32 }``        | A triple of three ``i32`` values                                                                                                                                                      |
++------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``{ float, i32 (i32) * }``   | A pair, where the first element is a ``float`` and the second element is a :ref:`pointer <t_pointer>` to a :ref:`function <t_function>` that takes an ``i32``, returning an ``i32``.  |
++------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| ``<{ i8, i32 }>``            | A packed struct known to be 5 bytes in size.                                                                                                                                          |
++------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+.. _t_opaque:
+
+Opaque Structure Types
+^^^^^^^^^^^^^^^^^^^^^^
+
+Overview:
+"""""""""
+
+Opaque structure types are used to represent named structure types that
+do not have a body specified. This corresponds (for example) to the C
+notion of a forward declared structure.
+
+Syntax:
+"""""""
+
+::
+
+      %X = type opaque
+      %52 = type opaque
+
+Examples:
+"""""""""
+
++--------------+-------------------+
+| ``opaque``   | An opaque type.   |
++--------------+-------------------+
+
+.. _t_pointer:
+
+Pointer Type
+^^^^^^^^^^^^
+
+Overview:
+"""""""""
+
+The pointer type is used to specify memory locations. Pointers are
+commonly used to reference objects in memory.
+
+Pointer types may have an optional address space attribute defining the
+numbered address space where the pointed-to object resides. The default
+address space is number zero. The semantics of non-zero address spaces
+are target-specific.
+
+Note that LLVM does not permit pointers to void (``void*``) nor does it
+permit pointers to labels (``label*``). Use ``i8*`` instead.
+
+Syntax:
+"""""""
+
+::
+
+      <type> *
+
+Examples:
+"""""""""
+
++-------------------------+--------------------------------------------------------------------------------------------------------------+
+| ``[4 x i32]*``          | A :ref:`pointer <t_pointer>` to :ref:`array <t_array>` of four ``i32`` values.                               |
++-------------------------+--------------------------------------------------------------------------------------------------------------+
+| ``i32 (i32*) *``        | A :ref:`pointer <t_pointer>` to a :ref:`function <t_function>` that takes an ``i32*``, returning an ``i32``. |
++-------------------------+--------------------------------------------------------------------------------------------------------------+
+| ``i32 addrspace(5)*``   | A :ref:`pointer <t_pointer>` to an ``i32`` value that resides in address space #5.                           |
++-------------------------+--------------------------------------------------------------------------------------------------------------+
+
+.. _t_vector:
+
+Vector Type
+^^^^^^^^^^^
+
+Overview:
+"""""""""
+
+A vector type is a simple derived type that represents a vector of
+elements. Vector types are used when multiple primitive data are
+operated in parallel using a single instruction (SIMD). A vector type
+requires a size (number of elements) and an underlying primitive data
+type. Vector types are considered :ref:`first class <t_firstclass>`.
+
+Syntax:
+"""""""
+
+::
+
+      < <# elements> x <elementtype> >
+
+The number of elements is a constant integer value larger than 0;
+elementtype may be any integer or floating point type, or a pointer to
+these types. Vectors of size zero are not allowed.
+
+Examples:
+"""""""""
+
++-------------------+--------------------------------------------------+
+| ``<4 x i32>``     | Vector of 4 32-bit integer values.               |
++-------------------+--------------------------------------------------+
+| ``<8 x float>``   | Vector of 8 32-bit floating-point values.        |
++-------------------+--------------------------------------------------+
+| ``<2 x i64>``     | Vector of 2 64-bit integer values.               |
++-------------------+--------------------------------------------------+
+| ``<4 x i64*>``    | Vector of 4 pointers to 64-bit integer values.   |
++-------------------+--------------------------------------------------+
+
+Constants
+=========
+
+LLVM has several different basic types of constants. This section
+describes them all and their syntax.
+
+Simple Constants
+----------------
+
+**Boolean constants**
+    The two strings '``true``' and '``false``' are both valid constants
+    of the ``i1`` type.
+**Integer constants**
+    Standard integers (such as '4') are constants of the
+    :ref:`integer <t_integer>` type. Negative numbers may be used with
+    integer types.
+**Floating point constants**
+    Floating point constants use standard decimal notation (e.g.
+    123.421), exponential notation (e.g. 1.23421e+2), or a more precise
+    hexadecimal notation (see below). The assembler requires the exact
+    decimal value of a floating-point constant. For example, the
+    assembler accepts 1.25 but rejects 1.3 because 1.3 is a repeating
+    decimal in binary. Floating point constants must have a :ref:`floating
+    point <t_floating>` type.
+**Null pointer constants**
+    The identifier '``null``' is recognized as a null pointer constant
+    and must be of :ref:`pointer type <t_pointer>`.
+
+The one non-intuitive notation for constants is the hexadecimal form of
+floating point constants. For example, the form
+'``double    0x432ff973cafa8000``' is equivalent to (but harder to read
+than) '``double 4.5e+15``'. The only time hexadecimal floating point
+constants are required (and the only time that they are generated by the
+disassembler) is when a floating point constant must be emitted but it
+cannot be represented as a decimal floating point number in a reasonable
+number of digits. For example, NaN's, infinities, and other special
+values are represented in their IEEE hexadecimal format so that assembly
+and disassembly do not cause any bits to change in the constants.
+
+When using the hexadecimal form, constants of types half, float, and
+double are represented using the 16-digit form shown above (which
+matches the IEEE754 representation for double); half and float values
+must, however, be exactly representable as IEE754 half and single
+precision, respectively. Hexadecimal format is always used for long
+double, and there are three forms of long double. The 80-bit format used
+by x86 is represented as ``0xK`` followed by 20 hexadecimal digits. The
+128-bit format used by PowerPC (two adjacent doubles) is represented by
+``0xM`` followed by 32 hexadecimal digits. The IEEE 128-bit format is
+represented by ``0xL`` followed by 32 hexadecimal digits; no currently
+supported target uses this format. Long doubles will only work if they
+match the long double format on your target. The IEEE 16-bit format
+(half precision) is represented by ``0xH`` followed by 4 hexadecimal
+digits. All hexadecimal formats are big-endian (sign bit at the left).
+
+There are no constants of type x86mmx.
+
+Complex Constants
+-----------------
+
+Complex constants are a (potentially recursive) combination of simple
+constants and smaller complex constants.
+
+**Structure constants**
+    Structure constants are represented with notation similar to
+    structure type definitions (a comma separated list of elements,
+    surrounded by braces (``{}``)). For example:
+    "``{ i32 4, float 17.0, i32* @G }``", where "``@G``" is declared as
+    "``@G = external global i32``". Structure constants must have
+    :ref:`structure type <t_struct>`, and the number and types of elements
+    must match those specified by the type.
+**Array constants**
+    Array constants are represented with notation similar to array type
+    definitions (a comma separated list of elements, surrounded by
+    square brackets (``[]``)). For example:
+    "``[ i32 42, i32 11, i32 74 ]``". Array constants must have
+    :ref:`array type <t_array>`, and the number and types of elements must
+    match those specified by the type.
+**Vector constants**
+    Vector constants are represented with notation similar to vector
+    type definitions (a comma separated list of elements, surrounded by
+    less-than/greater-than's (``<>``)). For example:
+    "``< i32 42, i32 11, i32 74, i32 100 >``". Vector constants
+    must have :ref:`vector type <t_vector>`, and the number and types of
+    elements must match those specified by the type.
+**Zero initialization**
+    The string '``zeroinitializer``' can be used to zero initialize a
+    value to zero of *any* type, including scalar and
+    :ref:`aggregate <t_aggregate>` types. This is often used to avoid
+    having to print large zero initializers (e.g. for large arrays) and
+    is always exactly equivalent to using explicit zero initializers.
+**Metadata node**
+    A metadata node is a structure-like constant with :ref:`metadata
+    type <t_metadata>`. For example:
+    "``metadata !{ i32 0, metadata !"test" }``". Unlike other
+    constants that are meant to be interpreted as part of the
+    instruction stream, metadata is a place to attach additional
+    information such as debug info.
+
+Global Variable and Function Addresses
+--------------------------------------
+
+The addresses of :ref:`global variables <globalvars>` and
+:ref:`functions <functionstructure>` are always implicitly valid
+(link-time) constants. These constants are explicitly referenced when
+the :ref:`identifier for the global <identifiers>` is used and always have
+:ref:`pointer <t_pointer>` type. For example, the following is a legal LLVM
+file:
+
+.. code-block:: llvm
+
+    @X = global i32 17
+    @Y = global i32 42
+    @Z = global [2 x i32*] [ i32* @X, i32* @Y ]
+
+.. _undefvalues:
+
+Undefined Values
+----------------
+
+The string '``undef``' can be used anywhere a constant is expected, and
+indicates that the user of the value may receive an unspecified
+bit-pattern. Undefined values may be of any type (other than '``label``'
+or '``void``') and be used anywhere a constant is permitted.
+
+Undefined values are useful because they indicate to the compiler that
+the program is well defined no matter what value is used. This gives the
+compiler more freedom to optimize. Here are some examples of
+(potentially surprising) transformations that are valid (in pseudo IR):
+
+.. code-block:: llvm
+
+      %A = add %X, undef
+      %B = sub %X, undef
+      %C = xor %X, undef
+    Safe:
+      %A = undef
+      %B = undef
+      %C = undef
+
+This is safe because all of the output bits are affected by the undef
+bits. Any output bit can have a zero or one depending on the input bits.
+
+.. code-block:: llvm
+
+      %A = or %X, undef
+      %B = and %X, undef
+    Safe:
+      %A = -1
+      %B = 0
+    Unsafe:
+      %A = undef
+      %B = undef
+
+These logical operations have bits that are not always affected by the
+input. For example, if ``%X`` has a zero bit, then the output of the
+'``and``' operation will always be a zero for that bit, no matter what
+the corresponding bit from the '``undef``' is. As such, it is unsafe to
+optimize or assume that the result of the '``and``' is '``undef``'.
+However, it is safe to assume that all bits of the '``undef``' could be
+0, and optimize the '``and``' to 0. Likewise, it is safe to assume that
+all the bits of the '``undef``' operand to the '``or``' could be set,
+allowing the '``or``' to be folded to -1.
+
+.. code-block:: llvm
+
+      %A = select undef, %X, %Y
+      %B = select undef, 42, %Y
+      %C = select %X, %Y, undef
+    Safe:
+      %A = %X     (or %Y)
+      %B = 42     (or %Y)
+      %C = %Y
+    Unsafe:
+      %A = undef
+      %B = undef
+      %C = undef
+
+This set of examples shows that undefined '``select``' (and conditional
+branch) conditions can go *either way*, but they have to come from one
+of the two operands. In the ``%A`` example, if ``%X`` and ``%Y`` were
+both known to have a clear low bit, then ``%A`` would have to have a
+cleared low bit. However, in the ``%C`` example, the optimizer is
+allowed to assume that the '``undef``' operand could be the same as
+``%Y``, allowing the whole '``select``' to be eliminated.
+
+.. code-block:: llvm
+
+      %A = xor undef, undef
+
+      %B = undef
+      %C = xor %B, %B
+
+      %D = undef
+      %E = icmp lt %D, 4
+      %F = icmp gte %D, 4
+
+    Safe:
+      %A = undef
+      %B = undef
+      %C = undef
+      %D = undef
+      %E = undef
+      %F = undef
+
+This example points out that two '``undef``' operands are not
+necessarily the same. This can be surprising to people (and also matches
+C semantics) where they assume that "``X^X``" is always zero, even if
+``X`` is undefined. This isn't true for a number of reasons, but the
+short answer is that an '``undef``' "variable" can arbitrarily change
+its value over its "live range". This is true because the variable
+doesn't actually *have a live range*. Instead, the value is logically
+read from arbitrary registers that happen to be around when needed, so
+the value is not necessarily consistent over time. In fact, ``%A`` and
+``%C`` need to have the same semantics or the core LLVM "replace all
+uses with" concept would not hold.
+
+.. code-block:: llvm
+
+      %A = fdiv undef, %X
+      %B = fdiv %X, undef
+    Safe:
+      %A = undef
+    b: unreachable
+
+These examples show the crucial difference between an *undefined value*
+and *undefined behavior*. An undefined value (like '``undef``') is
+allowed to have an arbitrary bit-pattern. This means that the ``%A``
+operation can be constant folded to '``undef``', because the '``undef``'
+could be an SNaN, and ``fdiv`` is not (currently) defined on SNaN's.
+However, in the second example, we can make a more aggressive
+assumption: because the ``undef`` is allowed to be an arbitrary value,
+we are allowed to assume that it could be zero. Since a divide by zero
+has *undefined behavior*, we are allowed to assume that the operation
+does not execute at all. This allows us to delete the divide and all
+code after it. Because the undefined operation "can't happen", the
+optimizer can assume that it occurs in dead code.
+
+.. code-block:: llvm
+
+    a:  store undef -> %X
+    b:  store %X -> undef
+    Safe:
+    a: <deleted>
+    b: unreachable
+
+These examples reiterate the ``fdiv`` example: a store *of* an undefined
+value can be assumed to not have any effect; we can assume that the
+value is overwritten with bits that happen to match what was already
+there. However, a store *to* an undefined location could clobber
+arbitrary memory, therefore, it has undefined behavior.
+
+.. _poisonvalues:
+
+Poison Values
+-------------
+
+Poison values are similar to :ref:`undef values <undefvalues>`, however
+they also represent the fact that an instruction or constant expression
+which cannot evoke side effects has nevertheless detected a condition
+which results in undefined behavior.
+
+There is currently no way of representing a poison value in the IR; they
+only exist when produced by operations such as :ref:`add <i_add>` with
+the ``nsw`` flag.
+
+Poison value behavior is defined in terms of value *dependence*:
+
+-  Values other than :ref:`phi <i_phi>` nodes depend on their operands.
+-  :ref:`Phi <i_phi>` nodes depend on the operand corresponding to
+   their dynamic predecessor basic block.
+-  Function arguments depend on the corresponding actual argument values
+   in the dynamic callers of their functions.
+-  :ref:`Call <i_call>` instructions depend on the :ref:`ret <i_ret>`
+   instructions that dynamically transfer control back to them.
+-  :ref:`Invoke <i_invoke>` instructions depend on the
+   :ref:`ret <i_ret>`, :ref:`resume <i_resume>`, or exception-throwing
+   call instructions that dynamically transfer control back to them.
+-  Non-volatile loads and stores depend on the most recent stores to all
+   of the referenced memory addresses, following the order in the IR
+   (including loads and stores implied by intrinsics such as
+   :ref:`@llvm.memcpy <int_memcpy>`.)
+-  An instruction with externally visible side effects depends on the
+   most recent preceding instruction with externally visible side
+   effects, following the order in the IR. (This includes :ref:`volatile
+   operations <volatile>`.)
+-  An instruction *control-depends* on a :ref:`terminator
+   instruction <terminators>` if the terminator instruction has
+   multiple successors and the instruction is always executed when
+   control transfers to one of the successors, and may not be executed
+   when control is transferred to another.
+-  Additionally, an instruction also *control-depends* on a terminator
+   instruction if the set of instructions it otherwise depends on would
+   be different if the terminator had transferred control to a different
+   successor.
+-  Dependence is transitive.
+
+Poison Values have the same behavior as :ref:`undef values <undefvalues>`,
+with the additional affect that any instruction which has a *dependence*
+on a poison value has undefined behavior.
+
+Here are some examples:
+
+.. code-block:: llvm
+
+    entry:
+      %poison = sub nuw i32 0, 1           ; Results in a poison value.
+      %still_poison = and i32 %poison, 0   ; 0, but also poison.
+      %poison_yet_again = getelementptr i32* @h, i32 %still_poison
+      store i32 0, i32* %poison_yet_again  ; memory at @h[0] is poisoned
+
+      store i32 %poison, i32* @g           ; Poison value stored to memory.
+      %poison2 = load i32* @g              ; Poison value loaded back from memory.
+
+      store volatile i32 %poison, i32* @g  ; External observation; undefined behavior.
+
+      %narrowaddr = bitcast i32* @g to i16*
+      %wideaddr = bitcast i32* @g to i64*
+      %poison3 = load i16* %narrowaddr     ; Returns a poison value.
+      %poison4 = load i64* %wideaddr       ; Returns a poison value.
+
+      %cmp = icmp slt i32 %poison, 0       ; Returns a poison value.
+      br i1 %cmp, label %true, label %end  ; Branch to either destination.
+
+    true:
+      store volatile i32 0, i32* @g        ; This is control-dependent on %cmp, so
+                                           ; it has undefined behavior.
+      br label %end
+
+    end:
+      %p = phi i32 [ 0, %entry ], [ 1, %true ]
+                                           ; Both edges into this PHI are
+                                           ; control-dependent on %cmp, so this
+                                           ; always results in a poison value.
+
+      store volatile i32 0, i32* @g        ; This would depend on the store in %true
+                                           ; if %cmp is true, or the store in %entry
+                                           ; otherwise, so this is undefined behavior.
+
+      br i1 %cmp, label %second_true, label %second_end
+                                           ; The same branch again, but this time the
+                                           ; true block doesn't have side effects.
+
+    second_true:
+      ; No side effects!
+      ret void
+
+    second_end:
+      store volatile i32 0, i32* @g        ; This time, the instruction always depends
+                                           ; on the store in %end. Also, it is
+                                           ; control-equivalent to %end, so this is
+                                           ; well-defined (ignoring earlier undefined
+                                           ; behavior in this example).
+
+.. _blockaddress:
+
+Addresses of Basic Blocks
+-------------------------
+
+``blockaddress(@function, %block)``
+
+The '``blockaddress``' constant computes the address of the specified
+basic block in the specified function, and always has an ``i8*`` type.
+Taking the address of the entry block is illegal.
+
+This value only has defined behavior when used as an operand to the
+':ref:`indirectbr <i_indirectbr>`' instruction, or for comparisons
+against null. Pointer equality tests between labels addresses results in
+undefined behavior — though, again, comparison against null is ok, and
+no label is equal to the null pointer. This may be passed around as an
+opaque pointer sized value as long as the bits are not inspected. This
+allows ``ptrtoint`` and arithmetic to be performed on these values so
+long as the original value is reconstituted before the ``indirectbr``
+instruction.
+
+Finally, some targets may provide defined semantics when using the value
+as the operand to an inline assembly, but that is target specific.
+
+Constant Expressions
+--------------------
+
+Constant expressions are used to allow expressions involving other
+constants to be used as constants. Constant expressions may be of any
+:ref:`first class <t_firstclass>` type and may involve any LLVM operation
+that does not have side effects (e.g. load and call are not supported).
+The following is the syntax for constant expressions:
+
+``trunc (CST to TYPE)``
+    Truncate a constant to another type. The bit size of CST must be
+    larger than the bit size of TYPE. Both types must be integers.
+``zext (CST to TYPE)``
+    Zero extend a constant to another type. The bit size of CST must be
+    smaller than the bit size of TYPE. Both types must be integers.
+``sext (CST to TYPE)``
+    Sign extend a constant to another type. The bit size of CST must be
+    smaller than the bit size of TYPE. Both types must be integers.
+``fptrunc (CST to TYPE)``
+    Truncate a floating point constant to another floating point type.
+    The size of CST must be larger than the size of TYPE. Both types
+    must be floating point.
+``fpext (CST to TYPE)``
+    Floating point extend a constant to another type. The size of CST
+    must be smaller or equal to the size of TYPE. Both types must be
+    floating point.
+``fptoui (CST to TYPE)``
+    Convert a floating point constant to the corresponding unsigned
+    integer constant. TYPE must be a scalar or vector integer type. CST
+    must be of scalar or vector floating point type. Both CST and TYPE
+    must be scalars, or vectors of the same number of elements. If the
+    value won't fit in the integer type, the results are undefined.
+``fptosi (CST to TYPE)``
+    Convert a floating point constant to the corresponding signed
+    integer constant. TYPE must be a scalar or vector integer type. CST
+    must be of scalar or vector floating point type. Both CST and TYPE
+    must be scalars, or vectors of the same number of elements. If the
+    value won't fit in the integer type, the results are undefined.
+``uitofp (CST to TYPE)``
+    Convert an unsigned integer constant to the corresponding floating
+    point constant. TYPE must be a scalar or vector floating point type.
+    CST must be of scalar or vector integer type. Both CST and TYPE must
+    be scalars, or vectors of the same number of elements. If the value
+    won't fit in the floating point type, the results are undefined.
+``sitofp (CST to TYPE)``
+    Convert a signed integer constant to the corresponding floating
+    point constant. TYPE must be a scalar or vector floating point type.
+    CST must be of scalar or vector integer type. Both CST and TYPE must
+    be scalars, or vectors of the same number of elements. If the value
+    won't fit in the floating point type, the results are undefined.
+``ptrtoint (CST to TYPE)``
+    Convert a pointer typed constant to the corresponding integer
+    constant ``TYPE`` must be an integer type. ``CST`` must be of
+    pointer type. The ``CST`` value is zero extended, truncated, or
+    unchanged to make it fit in ``TYPE``.
+``inttoptr (CST to TYPE)``
+    Convert an integer constant to a pointer constant. TYPE must be a
+    pointer type. CST must be of integer type. The CST value is zero
+    extended, truncated, or unchanged to make it fit in a pointer size.
+    This one is *really* dangerous!
+``bitcast (CST to TYPE)``
+    Convert a constant, CST, to another TYPE. The constraints of the
+    operands are the same as those for the :ref:`bitcast
+    instruction <i_bitcast>`.
+``getelementptr (CSTPTR, IDX0, IDX1, ...)``, ``getelementptr inbounds (CSTPTR, IDX0, IDX1, ...)``
+    Perform the :ref:`getelementptr operation <i_getelementptr>` on
+    constants. As with the :ref:`getelementptr <i_getelementptr>`
+    instruction, the index list may have zero or more indexes, which are
+    required to make sense for the type of "CSTPTR".
+``select (COND, VAL1, VAL2)``
+    Perform the :ref:`select operation <i_select>` on constants.
+``icmp COND (VAL1, VAL2)``
+    Performs the :ref:`icmp operation <i_icmp>` on constants.
+``fcmp COND (VAL1, VAL2)``
+    Performs the :ref:`fcmp operation <i_fcmp>` on constants.
+``extractelement (VAL, IDX)``
+    Perform the :ref:`extractelement operation <i_extractelement>` on
+    constants.
+``insertelement (VAL, ELT, IDX)``
+    Perform the :ref:`insertelement operation <i_insertelement>` on
+    constants.
+``shufflevector (VEC1, VEC2, IDXMASK)``
+    Perform the :ref:`shufflevector operation <i_shufflevector>` on
+    constants.
+``extractvalue (VAL, IDX0, IDX1, ...)``
+    Perform the :ref:`extractvalue operation <i_extractvalue>` on
+    constants. The index list is interpreted in a similar manner as
+    indices in a ':ref:`getelementptr <i_getelementptr>`' operation. At
+    least one index value must be specified.
+``insertvalue (VAL, ELT, IDX0, IDX1, ...)``
+    Perform the :ref:`insertvalue operation <i_insertvalue>` on constants.
+    The index list is interpreted in a similar manner as indices in a
+    ':ref:`getelementptr <i_getelementptr>`' operation. At least one index
+    value must be specified.
+``OPCODE (LHS, RHS)``
+    Perform the specified operation of the LHS and RHS constants. OPCODE
+    may be any of the :ref:`binary <binaryops>` or :ref:`bitwise
+    binary <bitwiseops>` operations. The constraints on operands are
+    the same as those for the corresponding instruction (e.g. no bitwise
+    operations on floating point values are allowed).
+
+Other Values
+============
+
+Inline Assembler Expressions
+----------------------------
+
+LLVM supports inline assembler expressions (as opposed to :ref:`Module-Level
+Inline Assembly <moduleasm>`) through the use of a special value. This
+value represents the inline assembler as a string (containing the
+instructions to emit), a list of operand constraints (stored as a
+string), a flag that indicates whether or not the inline asm expression
+has side effects, and a flag indicating whether the function containing
+the asm needs to align its stack conservatively. An example inline
+assembler expression is:
+
+.. code-block:: llvm
+
+    i32 (i32) asm "bswap $0", "=r,r"
+
+Inline assembler expressions may **only** be used as the callee operand
+of a :ref:`call <i_call>` or an :ref:`invoke <i_invoke>` instruction.
+Thus, typically we have:
+
+.. code-block:: llvm
+
+    %X = call i32 asm "bswap $0", "=r,r"(i32 %Y)
+
+Inline asms with side effects not visible in the constraint list must be
+marked as having side effects. This is done through the use of the
+'``sideeffect``' keyword, like so:
+
+.. code-block:: llvm
+
+    call void asm sideeffect "eieio", ""()
+
+In some cases inline asms will contain code that will not work unless
+the stack is aligned in some way, such as calls or SSE instructions on
+x86, yet will not contain code that does that alignment within the asm.
+The compiler should make conservative assumptions about what the asm
+might contain and should generate its usual stack alignment code in the
+prologue if the '``alignstack``' keyword is present:
+
+.. code-block:: llvm
+
+    call void asm alignstack "eieio", ""()
+
+Inline asms also support using non-standard assembly dialects. The
+assumed dialect is ATT. When the '``inteldialect``' keyword is present,
+the inline asm is using the Intel dialect. Currently, ATT and Intel are
+the only supported dialects. An example is:
+
+.. code-block:: llvm
+
+    call void asm inteldialect "eieio", ""()
+
+If multiple keywords appear the '``sideeffect``' keyword must come
+first, the '``alignstack``' keyword second and the '``inteldialect``'
+keyword last.
+
+Inline Asm Metadata
+^^^^^^^^^^^^^^^^^^^
+
+The call instructions that wrap inline asm nodes may have a
+"``!srcloc``" MDNode attached to it that contains a list of constant
+integers. If present, the code generator will use the integer as the
+location cookie value when report errors through the ``LLVMContext``
+error reporting mechanisms. This allows a front-end to correlate backend
+errors that occur with inline asm back to the source code that produced
+it. For example:
+
+.. code-block:: llvm
+
+    call void asm sideeffect "something bad", ""(), !srcloc !42
+    ...
+    !42 = !{ i32 1234567 }
+
+It is up to the front-end to make sense of the magic numbers it places
+in the IR. If the MDNode contains multiple constants, the code generator
+will use the one that corresponds to the line of the asm that the error
+occurs on.
+
+.. _metadata:
+
+Metadata Nodes and Metadata Strings
+-----------------------------------
+
+LLVM IR allows metadata to be attached to instructions in the program
+that can convey extra information about the code to the optimizers and
+code generator. One example application of metadata is source-level
+debug information. There are two metadata primitives: strings and nodes.
+All metadata has the ``metadata`` type and is identified in syntax by a
+preceding exclamation point ('``!``').
+
+A metadata string is a string surrounded by double quotes. It can
+contain any character by escaping non-printable characters with
+"``\xx``" where "``xx``" is the two digit hex code. For example:
+"``!"test\00"``".
+
+Metadata nodes are represented with notation similar to structure
+constants (a comma separated list of elements, surrounded by braces and
+preceded by an exclamation point). Metadata nodes can have any values as
+their operand. For example:
+
+.. code-block:: llvm
+
+    !{ metadata !"test\00", i32 10}
+
+A :ref:`named metadata <namedmetadatastructure>` is a collection of
+metadata nodes, which can be looked up in the module symbol table. For
+example:
+
+.. code-block:: llvm
+
+    !foo =  metadata !{!4, !3}
+
+Metadata can be used as function arguments. Here ``llvm.dbg.value``
+function is using two metadata arguments:
+
+.. code-block:: llvm
+
+    call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
+
+Metadata can be attached with an instruction. Here metadata ``!21`` is
+attached to the ``add`` instruction using the ``!dbg`` identifier:
+
+.. code-block:: llvm
+
+    %indvar.next = add i64 %indvar, 1, !dbg !21
+
+More information about specific metadata nodes recognized by the
+optimizers and code generator is found below.
+
+'``tbaa``' Metadata
+^^^^^^^^^^^^^^^^^^^
+
+In LLVM IR, memory does not have types, so LLVM's own type system is not
+suitable for doing TBAA. Instead, metadata is added to the IR to
+describe a type system of a higher level language. This can be used to
+implement typical C/C++ TBAA, but it can also be used to implement
+custom alias analysis behavior for other languages.
+
+The current metadata format is very simple. TBAA metadata nodes have up
+to three fields, e.g.:
+
+.. code-block:: llvm
+
+    !0 = metadata !{ metadata !"an example type tree" }
+    !1 = metadata !{ metadata !"int", metadata !0 }
+    !2 = metadata !{ metadata !"float", metadata !0 }
+    !3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
+
+The first field is an identity field. It can be any value, usually a
+metadata string, which uniquely identifies the type. The most important
+name in the tree is the name of the root node. Two trees with different
+root node names are entirely disjoint, even if they have leaves with
+common names.
+
+The second field identifies the type's parent node in the tree, or is
+null or omitted for a root node. A type is considered to alias all of
+its descendants and all of its ancestors in the tree. Also, a type is
+considered to alias all types in other trees, so that bitcode produced
+from multiple front-ends is handled conservatively.
+
+If the third field is present, it's an integer which if equal to 1
+indicates that the type is "constant" (meaning
+``pointsToConstantMemory`` should return true; see `other useful
+AliasAnalysis methods <AliasAnalysis.html#OtherItfs>`_).
+
+'``tbaa.struct``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The :ref:`llvm.memcpy <int_memcpy>` is often used to implement
+aggregate assignment operations in C and similar languages, however it
+is defined to copy a contiguous region of memory, which is more than
+strictly necessary for aggregate types which contain holes due to
+padding. Also, it doesn't contain any TBAA information about the fields
+of the aggregate.
+
+``!tbaa.struct`` metadata can describe which memory subregions in a
+memcpy are padding and what the TBAA tags of the struct are.
+
+The current metadata format is very simple. ``!tbaa.struct`` metadata
+nodes are a list of operands which are in conceptual groups of three.
+For each group of three, the first operand gives the byte offset of a
+field in bytes, the second gives its size in bytes, and the third gives
+its tbaa tag. e.g.:
+
+.. code-block:: llvm
+
+    !4 = metadata !{ i64 0, i64 4, metadata !1, i64 8, i64 4, metadata !2 }
+
+This describes a struct with two fields. The first is at offset 0 bytes
+with size 4 bytes, and has tbaa tag !1. The second is at offset 8 bytes
+and has size 4 bytes and has tbaa tag !2.
+
+Note that the fields need not be contiguous. In this example, there is a
+4 byte gap between the two fields. This gap represents padding which
+does not carry useful data and need not be preserved.
+
+'``fpmath``' Metadata
+^^^^^^^^^^^^^^^^^^^^^
+
+``fpmath`` metadata may be attached to any instruction of floating point
+type. It can be used to express the maximum acceptable error in the
+result of that instruction, in ULPs, thus potentially allowing the
+compiler to use a more efficient but less accurate method of computing
+it. ULP is defined as follows:
+
+    If ``x`` is a real number that lies between two finite consecutive
+    floating-point numbers ``a`` and ``b``, without being equal to one
+    of them, then ``ulp(x) = |b - a|``, otherwise ``ulp(x)`` is the
+    distance between the two non-equal finite floating-point numbers
+    nearest ``x``. Moreover, ``ulp(NaN)`` is ``NaN``.
+
+The metadata node shall consist of a single positive floating point
+number representing the maximum relative error, for example:
+
+.. code-block:: llvm
+
+    !0 = metadata !{ float 2.5 } ; maximum acceptable inaccuracy is 2.5 ULPs
+
+'``range``' Metadata
+^^^^^^^^^^^^^^^^^^^^
+
+``range`` metadata may be attached only to loads of integer types. It
+expresses the possible ranges the loaded value is in. The ranges are
+represented with a flattened list of integers. The loaded value is known
+to be in the union of the ranges defined by each consecutive pair. Each
+pair has the following properties:
+
+-  The type must match the type loaded by the instruction.
+-  The pair ``a,b`` represents the range ``[a,b)``.
+-  Both ``a`` and ``b`` are constants.
+-  The range is allowed to wrap.
+-  The range should not represent the full or empty set. That is,
+   ``a!=b``.
+
+In addition, the pairs must be in signed order of the lower bound and
+they must be non-contiguous.
+
+Examples:
+
+.. code-block:: llvm
+
+      %a = load i8* %x, align 1, !range !0 ; Can only be 0 or 1
+      %b = load i8* %y, align 1, !range !1 ; Can only be 255 (-1), 0 or 1
+      %c = load i8* %z, align 1, !range !2 ; Can only be 0, 1, 3, 4 or 5
+      %d = load i8* %z, align 1, !range !3 ; Can only be -2, -1, 3, 4 or 5
+    ...
+    !0 = metadata !{ i8 0, i8 2 }
+    !1 = metadata !{ i8 255, i8 2 }
+    !2 = metadata !{ i8 0, i8 2, i8 3, i8 6 }
+    !3 = metadata !{ i8 -2, i8 0, i8 3, i8 6 }
+
+Module Flags Metadata
+=====================
+
+Information about the module as a whole is difficult to convey to LLVM's
+subsystems. The LLVM IR isn't sufficient to transmit this information.
+The ``llvm.module.flags`` named metadata exists in order to facilitate
+this. These flags are in the form of key / value pairs — much like a
+dictionary — making it easy for any subsystem who cares about a flag to
+look it up.
+
+The ``llvm.module.flags`` metadata contains a list of metadata triplets.
+Each triplet has the following form:
+
+-  The first element is a *behavior* flag, which specifies the behavior
+   when two (or more) modules are merged together, and it encounters two
+   (or more) metadata with the same ID. The supported behaviors are
+   described below.
+-  The second element is a metadata string that is a unique ID for the
+   metadata. How each ID is interpreted is documented below.
+-  The third element is the value of the flag.
+
+When two (or more) modules are merged together, the resulting
+``llvm.module.flags`` metadata is the union of the modules'
+``llvm.module.flags`` metadata. The only exception being a flag with the
+*Override* behavior, which may override another flag's value (see
+below).
+
+The following behaviors are supported:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 10 90
+
+   * - Value
+     - Behavior
+
+   * - 1
+     - **Error**
+           Emits an error if two values disagree. It is an error to have an
+           ID with both an Error and a Warning behavior.
+
+   * - 2
+     - **Warning**
+           Emits a warning if two values disagree.
+
+   * - 3
+     - **Require**
+           Emits an error when the specified value is not present or doesn't
+           have the specified value. It is an error for two (or more)
+           ``llvm.module.flags`` with the same ID to have the Require behavior
+           but different values. There may be multiple Require flags per ID.
+
+   * - 4
+     - **Override**
+           Uses the specified value if the two values disagree. It is an
+           error for two (or more) ``llvm.module.flags`` with the same ID
+           to have the Override behavior but different values.
+
+An example of module flags:
+
+.. code-block:: llvm
+
+    !0 = metadata !{ i32 1, metadata !"foo", i32 1 }
+    !1 = metadata !{ i32 4, metadata !"bar", i32 37 }
+    !2 = metadata !{ i32 2, metadata !"qux", i32 42 }
+    !3 = metadata !{ i32 3, metadata !"qux",
+      metadata !{
+        metadata !"foo", i32 1
+      }
+    }
+    !llvm.module.flags = !{ !0, !1, !2, !3 }
+
+-  Metadata ``!0`` has the ID ``!"foo"`` and the value '1'. The behavior
+   if two or more ``!"foo"`` flags are seen is to emit an error if their
+   values are not equal.
+
+-  Metadata ``!1`` has the ID ``!"bar"`` and the value '37'. The
+   behavior if two or more ``!"bar"`` flags are seen is to use the value
+   '37' if their values are not equal.
+
+-  Metadata ``!2`` has the ID ``!"qux"`` and the value '42'. The
+   behavior if two or more ``!"qux"`` flags are seen is to emit a
+   warning if their values are not equal.
+
+-  Metadata ``!3`` has the ID ``!"qux"`` and the value:
+
+   ::
+
+       metadata !{ metadata !"foo", i32 1 }
+
+   The behavior is to emit an error if the ``llvm.module.flags`` does
+   not contain a flag with the ID ``!"foo"`` that has the value '1'. If
+   two or more ``!"qux"`` flags exist, then they must have the same
+   value or an error will be issued.
+
+Objective-C Garbage Collection Module Flags Metadata
+----------------------------------------------------
+
+On the Mach-O platform, Objective-C stores metadata about garbage
+collection in a special section called "image info". The metadata
+consists of a version number and a bitmask specifying what types of
+garbage collection are supported (if any) by the file. If two or more
+modules are linked together their garbage collection metadata needs to
+be merged rather than appended together.
+
+The Objective-C garbage collection module flags metadata consists of the
+following key-value pairs:
+
+.. list-table::
+   :header-rows: 1
+   :widths: 30 70
+
+   * - Key
+     - Value
+
+   * - ``Objective-C Version``
+     - **[Required]** — The Objective-C ABI version. Valid values are 1 and 2.
+
+   * - ``Objective-C Image Info Version``
+     - **[Required]** — The version of the image info section. Currently
+       always 0.
+
+   * - ``Objective-C Image Info Section``
+     - **[Required]** — The section to place the metadata. Valid values are
+       ``"__OBJC, __image_info, regular"`` for Objective-C ABI version 1, and
+       ``"__DATA,__objc_imageinfo, regular, no_dead_strip"`` for
+       Objective-C ABI version 2.
+
+   * - ``Objective-C Garbage Collection``
+     - **[Required]** — Specifies whether garbage collection is supported or
+       not. Valid values are 0, for no garbage collection, and 2, for garbage
+       collection supported.
+
+   * - ``Objective-C GC Only``
+     - **[Optional]** — Specifies that only garbage collection is supported.
+       If present, its value must be 6. This flag requires that the
+       ``Objective-C Garbage Collection`` flag have the value 2.
+
+Some important flag interactions:
+
+-  If a module with ``Objective-C Garbage Collection`` set to 0 is
+   merged with a module with ``Objective-C Garbage Collection`` set to
+   2, then the resulting module has the
+   ``Objective-C Garbage Collection`` flag set to 0.
+-  A module with ``Objective-C Garbage Collection`` set to 0 cannot be
+   merged with a module with ``Objective-C GC Only`` set to 6.
+
+Intrinsic Global Variables
+==========================
+
+LLVM has a number of "magic" global variables that contain data that
+affect code generation or other IR semantics. These are documented here.
+All globals of this sort should have a section specified as
+"``llvm.metadata``". This section and all globals that start with
+"``llvm.``" are reserved for use by LLVM.
+
+The '``llvm.used``' Global Variable
+-----------------------------------
+
+The ``@llvm.used`` global is an array with i8\* element type which has
+:ref:`appending linkage <linkage_appending>`. This array contains a list of
+pointers to global variables and functions which may optionally have a
+pointer cast formed of bitcast or getelementptr. For example, a legal
+use of it is:
+
+.. code-block:: llvm
+
+    @X = global i8 4
+    @Y = global i32 123
+
+    @llvm.used = appending global [2 x i8*] [
+       i8* @X,
+       i8* bitcast (i32* @Y to i8*)
+    ], section "llvm.metadata"
+
+If a global variable appears in the ``@llvm.used`` list, then the
+compiler, assembler, and linker are required to treat the symbol as if
+there is a reference to the global that it cannot see. For example, if a
+variable has internal linkage and no references other than that from the
+``@llvm.used`` list, it cannot be deleted. This is commonly used to
+represent references from inline asms and other things the compiler
+cannot "see", and corresponds to "``attribute((used))``" in GNU C.
+
+On some targets, the code generator must emit a directive to the
+assembler or object file to prevent the assembler and linker from
+molesting the symbol.
+
+The '``llvm.compiler.used``' Global Variable
+--------------------------------------------
+
+The ``@llvm.compiler.used`` directive is the same as the ``@llvm.used``
+directive, except that it only prevents the compiler from touching the
+symbol. On targets that support it, this allows an intelligent linker to
+optimize references to the symbol without being impeded as it would be
+by ``@llvm.used``.
+
+This is a rare construct that should only be used in rare circumstances,
+and should not be exposed to source languages.
+
+The '``llvm.global_ctors``' Global Variable
+-------------------------------------------
+
+.. code-block:: llvm
+
+    %0 = type { i32, void ()* }
+    @llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @ctor }]
+
+The ``@llvm.global_ctors`` array contains a list of constructor
+functions and associated priorities. The functions referenced by this
+array will be called in ascending order of priority (i.e. lowest first)
+when the module is loaded. The order of functions with the same priority
+is not defined.
+
+The '``llvm.global_dtors``' Global Variable
+-------------------------------------------
+
+.. code-block:: llvm
+
+    %0 = type { i32, void ()* }
+    @llvm.global_dtors = appending global [1 x %0] [%0 { i32 65535, void ()* @dtor }]
+
+The ``@llvm.global_dtors`` array contains a list of destructor functions
+and associated priorities. The functions referenced by this array will
+be called in descending order of priority (i.e. highest first) when the
+module is loaded. The order of functions with the same priority is not
+defined.
+
+Instruction Reference
+=====================
+
+The LLVM instruction set consists of several different classifications
+of instructions: :ref:`terminator instructions <terminators>`, :ref:`binary
+instructions <binaryops>`, :ref:`bitwise binary
+instructions <bitwiseops>`, :ref:`memory instructions <memoryops>`, and
+:ref:`other instructions <otherops>`.
+
+.. _terminators:
+
+Terminator Instructions
+-----------------------
+
+As mentioned :ref:`previously <functionstructure>`, every basic block in a
+program ends with a "Terminator" instruction, which indicates which
+block should be executed after the current block is finished. These
+terminator instructions typically yield a '``void``' value: they produce
+control flow, not values (the one exception being the
+':ref:`invoke <i_invoke>`' instruction).
+
+The terminator instructions are: ':ref:`ret <i_ret>`',
+':ref:`br <i_br>`', ':ref:`switch <i_switch>`',
+':ref:`indirectbr <i_indirectbr>`', ':ref:`invoke <i_invoke>`',
+':ref:`resume <i_resume>`', and ':ref:`unreachable <i_unreachable>`'.
+
+.. _i_ret:
+
+'``ret``' Instruction
+^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      ret <type> <value>       ; Return a value from a non-void function
+      ret void                 ; Return from void function
+
+Overview:
+"""""""""
+
+The '``ret``' instruction is used to return control flow (and optionally
+a value) from a function back to the caller.
+
+There are two forms of the '``ret``' instruction: one that returns a
+value and then causes control flow, and one that just causes control
+flow to occur.
+
+Arguments:
+""""""""""
+
+The '``ret``' instruction optionally accepts a single argument, the
+return value. The type of the return value must be a ':ref:`first
+class <t_firstclass>`' type.
+
+A function is not :ref:`well formed <wellformed>` if it it has a non-void
+return type and contains a '``ret``' instruction with no return value or
+a return value with a type that does not match its type, or if it has a
+void return type and contains a '``ret``' instruction with a return
+value.
+
+Semantics:
+""""""""""
+
+When the '``ret``' instruction is executed, control flow returns back to
+the calling function's context. If the caller is a
+":ref:`call <i_call>`" instruction, execution continues at the
+instruction after the call. If the caller was an
+":ref:`invoke <i_invoke>`" instruction, execution continues at the
+beginning of the "normal" destination block. If the instruction returns
+a value, that value shall set the call or invoke instruction's return
+value.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      ret i32 5                       ; Return an integer value of 5
+      ret void                        ; Return from a void function
+      ret { i32, i8 } { i32 4, i8 2 } ; Return a struct of values 4 and 2
+
+.. _i_br:
+
+'``br``' Instruction
+^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      br i1 <cond>, label <iftrue>, label <iffalse>
+      br label <dest>          ; Unconditional branch
+
+Overview:
+"""""""""
+
+The '``br``' instruction is used to cause control flow to transfer to a
+different basic block in the current function. There are two forms of
+this instruction, corresponding to a conditional branch and an
+unconditional branch.
+
+Arguments:
+""""""""""
+
+The conditional branch form of the '``br``' instruction takes a single
+'``i1``' value and two '``label``' values. The unconditional form of the
+'``br``' instruction takes a single '``label``' value as a target.
+
+Semantics:
+""""""""""
+
+Upon execution of a conditional '``br``' instruction, the '``i1``'
+argument is evaluated. If the value is ``true``, control flows to the
+'``iftrue``' ``label`` argument. If "cond" is ``false``, control flows
+to the '``iffalse``' ``label`` argument.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+    Test:
+      %cond = icmp eq i32 %a, %b
+      br i1 %cond, label %IfEqual, label %IfUnequal
+    IfEqual:
+      ret i32 1
+    IfUnequal:
+      ret i32 0
+
+.. _i_switch:
+
+'``switch``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      switch <intty> <value>, label <defaultdest> [ <intty> <val>, label <dest> ... ]
+
+Overview:
+"""""""""
+
+The '``switch``' instruction is used to transfer control flow to one of
+several different places. It is a generalization of the '``br``'
+instruction, allowing a branch to occur to one of many possible
+destinations.
+
+Arguments:
+""""""""""
+
+The '``switch``' instruction uses three parameters: an integer
+comparison value '``value``', a default '``label``' destination, and an
+array of pairs of comparison value constants and '``label``'s. The table
+is not allowed to contain duplicate constant entries.
+
+Semantics:
+""""""""""
+
+The ``switch`` instruction specifies a table of values and destinations.
+When the '``switch``' instruction is executed, this table is searched
+for the given value. If the value is found, control flow is transferred
+to the corresponding destination; otherwise, control flow is transferred
+to the default destination.
+
+Implementation:
+"""""""""""""""
+
+Depending on properties of the target machine and the particular
+``switch`` instruction, this instruction may be code generated in
+different ways. For example, it could be generated as a series of
+chained conditional branches or with a lookup table.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+     ; Emulate a conditional br instruction
+     %Val = zext i1 %value to i32
+     switch i32 %Val, label %truedest [ i32 0, label %falsedest ]
+
+     ; Emulate an unconditional br instruction
+     switch i32 0, label %dest [ ]
+
+     ; Implement a jump table:
+     switch i32 %val, label %otherwise [ i32 0, label %onzero
+                                         i32 1, label %onone
+                                         i32 2, label %ontwo ]
+
+.. _i_indirectbr:
+
+'``indirectbr``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      indirectbr <somety>* <address>, [ label <dest1>, label <dest2>, ... ]
+
+Overview:
+"""""""""
+
+The '``indirectbr``' instruction implements an indirect branch to a
+label within the current function, whose address is specified by
+"``address``". Address must be derived from a
+:ref:`blockaddress <blockaddress>` constant.
+
+Arguments:
+""""""""""
+
+The '``address``' argument is the address of the label to jump to. The
+rest of the arguments indicate the full set of possible destinations
+that the address may point to. Blocks are allowed to occur multiple
+times in the destination list, though this isn't particularly useful.
+
+This destination list is required so that dataflow analysis has an
+accurate understanding of the CFG.
+
+Semantics:
+""""""""""
+
+Control transfers to the block specified in the address argument. All
+possible destination blocks must be listed in the label list, otherwise
+this instruction has undefined behavior. This implies that jumps to
+labels defined in other functions have undefined behavior as well.
+
+Implementation:
+"""""""""""""""
+
+This is typically implemented with a jump through a register.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+     indirectbr i8* %Addr, [ label %bb1, label %bb2, label %bb3 ]
+
+.. _i_invoke:
+
+'``invoke``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = invoke [cconv] [ret attrs] <ptr to function ty> <function ptr val>(<function args>) [fn attrs]
+                    to label <normal label> unwind label <exception label>
+
+Overview:
+"""""""""
+
+The '``invoke``' instruction causes control to transfer to a specified
+function, with the possibility of control flow transfer to either the
+'``normal``' label or the '``exception``' label. If the callee function
+returns with the "``ret``" instruction, control flow will return to the
+"normal" label. If the callee (or any indirect callees) returns via the
+":ref:`resume <i_resume>`" instruction or other exception handling
+mechanism, control is interrupted and continued at the dynamically
+nearest "exception" label.
+
+The '``exception``' label is a `landing
+pad <ExceptionHandling.html#overview>`_ for the exception. As such,
+'``exception``' label is required to have the
+":ref:`landingpad <i_landingpad>`" instruction, which contains the
+information about the behavior of the program after unwinding happens,
+as its first non-PHI instruction. The restrictions on the
+"``landingpad``" instruction's tightly couples it to the "``invoke``"
+instruction, so that the important information contained within the
+"``landingpad``" instruction can't be lost through normal code motion.
+
+Arguments:
+""""""""""
+
+This instruction requires several arguments:
+
+#. The optional "cconv" marker indicates which :ref:`calling
+   convention <callingconv>` the call should use. If none is
+   specified, the call defaults to using C calling conventions.
+#. The optional :ref:`Parameter Attributes <paramattrs>` list for return
+   values. Only '``zeroext``', '``signext``', and '``inreg``' attributes
+   are valid here.
+#. '``ptr to function ty``': shall be the signature of the pointer to
+   function value being invoked. In most cases, this is a direct
+   function invocation, but indirect ``invoke``'s are just as possible,
+   branching off an arbitrary pointer to function value.
+#. '``function ptr val``': An LLVM value containing a pointer to a
+   function to be invoked.
+#. '``function args``': argument list whose types match the function
+   signature argument types and parameter attributes. All arguments must
+   be of :ref:`first class <t_firstclass>` type. If the function signature
+   indicates the function accepts a variable number of arguments, the
+   extra arguments can be specified.
+#. '``normal label``': the label reached when the called function
+   executes a '``ret``' instruction.
+#. '``exception label``': the label reached when a callee returns via
+   the :ref:`resume <i_resume>` instruction or other exception handling
+   mechanism.
+#. The optional :ref:`function attributes <fnattrs>` list. Only
+   '``noreturn``', '``nounwind``', '``readonly``' and '``readnone``'
+   attributes are valid here.
+
+Semantics:
+""""""""""
+
+This instruction is designed to operate as a standard '``call``'
+instruction in most regards. The primary difference is that it
+establishes an association with a label, which is used by the runtime
+library to unwind the stack.
+
+This instruction is used in languages with destructors to ensure that
+proper cleanup is performed in the case of either a ``longjmp`` or a
+thrown exception. Additionally, this is important for implementation of
+'``catch``' clauses in high-level languages that support them.
+
+For the purposes of the SSA form, the definition of the value returned
+by the '``invoke``' instruction is deemed to occur on the edge from the
+current block to the "normal" label. If the callee unwinds then no
+return value is available.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %retval = invoke i32 @Test(i32 15) to label %Continue
+                  unwind label %TestCleanup              ; {i32}:retval set
+      %retval = invoke coldcc i32 %Testfnptr(i32 15) to label %Continue
+                  unwind label %TestCleanup              ; {i32}:retval set
+
+.. _i_resume:
+
+'``resume``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      resume <type> <value>
+
+Overview:
+"""""""""
+
+The '``resume``' instruction is a terminator instruction that has no
+successors.
+
+Arguments:
+""""""""""
+
+The '``resume``' instruction requires one argument, which must have the
+same type as the result of any '``landingpad``' instruction in the same
+function.
+
+Semantics:
+""""""""""
+
+The '``resume``' instruction resumes propagation of an existing
+(in-flight) exception whose unwinding was interrupted with a
+:ref:`landingpad <i_landingpad>` instruction.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      resume { i8*, i32 } %exn
+
+.. _i_unreachable:
+
+'``unreachable``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      unreachable
+
+Overview:
+"""""""""
+
+The '``unreachable``' instruction has no defined semantics. This
+instruction is used to inform the optimizer that a particular portion of
+the code is not reachable. This can be used to indicate that the code
+after a no-return function cannot be reached, and other facts.
+
+Semantics:
+""""""""""
+
+The '``unreachable``' instruction has no defined semantics.
+
+.. _binaryops:
+
+Binary Operations
+-----------------
+
+Binary operators are used to do most of the computation in a program.
+They require two operands of the same type, execute an operation on
+them, and produce a single value. The operands might represent multiple
+data, as is the case with the :ref:`vector <t_vector>` data type. The
+result value has the same type as its operands.
+
+There are several different binary operators:
+
+.. _i_add:
+
+'``add``' Instruction
+^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = add <ty> <op1>, <op2>          ; yields {ty}:result
+      <result> = add nuw <ty> <op1>, <op2>      ; yields {ty}:result
+      <result> = add nsw <ty> <op1>, <op2>      ; yields {ty}:result
+      <result> = add nuw nsw <ty> <op1>, <op2>  ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``add``' instruction returns the sum of its two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``add``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the integer sum of the two operands.
+
+If the sum has unsigned overflow, the result returned is the
+mathematical result modulo 2\ :sup:`n`\ , where n is the bit width of
+the result.
+
+Because LLVM integers use a two's complement representation, this
+instruction is appropriate for both signed and unsigned integers.
+
+``nuw`` and ``nsw`` stand for "No Unsigned Wrap" and "No Signed Wrap",
+respectively. If the ``nuw`` and/or ``nsw`` keywords are present, the
+result value of the ``add`` is a :ref:`poison value <poisonvalues>` if
+unsigned and/or signed overflow, respectively, occurs.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = add i32 4, %var          ; yields {i32}:result = 4 + %var
+
+.. _i_fadd:
+
+'``fadd``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fadd [fast-math flags]* <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``fadd``' instruction returns the sum of its two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``fadd``' instruction must be :ref:`floating
+point <t_floating>` or :ref:`vector <t_vector>` of floating point values.
+Both arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the floating point sum of the two operands. This
+instruction can also take any number of :ref:`fast-math flags <fastmath>`,
+which are optimization hints to enable otherwise unsafe floating point
+optimizations:
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = fadd float 4.0, %var          ; yields {float}:result = 4.0 + %var
+
+'``sub``' Instruction
+^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = sub <ty> <op1>, <op2>          ; yields {ty}:result
+      <result> = sub nuw <ty> <op1>, <op2>      ; yields {ty}:result
+      <result> = sub nsw <ty> <op1>, <op2>      ; yields {ty}:result
+      <result> = sub nuw nsw <ty> <op1>, <op2>  ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``sub``' instruction returns the difference of its two operands.
+
+Note that the '``sub``' instruction is used to represent the '``neg``'
+instruction present in most other intermediate representations.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``sub``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the integer difference of the two operands.
+
+If the difference has unsigned overflow, the result returned is the
+mathematical result modulo 2\ :sup:`n`\ , where n is the bit width of
+the result.
+
+Because LLVM integers use a two's complement representation, this
+instruction is appropriate for both signed and unsigned integers.
+
+``nuw`` and ``nsw`` stand for "No Unsigned Wrap" and "No Signed Wrap",
+respectively. If the ``nuw`` and/or ``nsw`` keywords are present, the
+result value of the ``sub`` is a :ref:`poison value <poisonvalues>` if
+unsigned and/or signed overflow, respectively, occurs.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = sub i32 4, %var          ; yields {i32}:result = 4 - %var
+      <result> = sub i32 0, %val          ; yields {i32}:result = -%var
+
+.. _i_fsub:
+
+'``fsub``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fsub [fast-math flags]* <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``fsub``' instruction returns the difference of its two operands.
+
+Note that the '``fsub``' instruction is used to represent the '``fneg``'
+instruction present in most other intermediate representations.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``fsub``' instruction must be :ref:`floating
+point <t_floating>` or :ref:`vector <t_vector>` of floating point values.
+Both arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the floating point difference of the two operands.
+This instruction can also take any number of :ref:`fast-math
+flags <fastmath>`, which are optimization hints to enable otherwise
+unsafe floating point optimizations:
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = fsub float 4.0, %var           ; yields {float}:result = 4.0 - %var
+      <result> = fsub float -0.0, %val          ; yields {float}:result = -%var
+
+'``mul``' Instruction
+^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = mul <ty> <op1>, <op2>          ; yields {ty}:result
+      <result> = mul nuw <ty> <op1>, <op2>      ; yields {ty}:result
+      <result> = mul nsw <ty> <op1>, <op2>      ; yields {ty}:result
+      <result> = mul nuw nsw <ty> <op1>, <op2>  ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``mul``' instruction returns the product of its two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``mul``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the integer product of the two operands.
+
+If the result of the multiplication has unsigned overflow, the result
+returned is the mathematical result modulo 2\ :sup:`n`\ , where n is the
+bit width of the result.
+
+Because LLVM integers use a two's complement representation, and the
+result is the same width as the operands, this instruction returns the
+correct result for both signed and unsigned integers. If a full product
+(e.g. ``i32`` * ``i32`` -> ``i64``) is needed, the operands should be
+sign-extended or zero-extended as appropriate to the width of the full
+product.
+
+``nuw`` and ``nsw`` stand for "No Unsigned Wrap" and "No Signed Wrap",
+respectively. If the ``nuw`` and/or ``nsw`` keywords are present, the
+result value of the ``mul`` is a :ref:`poison value <poisonvalues>` if
+unsigned and/or signed overflow, respectively, occurs.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = mul i32 4, %var          ; yields {i32}:result = 4 * %var
+
+.. _i_fmul:
+
+'``fmul``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fmul [fast-math flags]* <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``fmul``' instruction returns the product of its two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``fmul``' instruction must be :ref:`floating
+point <t_floating>` or :ref:`vector <t_vector>` of floating point values.
+Both arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the floating point product of the two operands.
+This instruction can also take any number of :ref:`fast-math
+flags <fastmath>`, which are optimization hints to enable otherwise
+unsafe floating point optimizations:
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = fmul float 4.0, %var          ; yields {float}:result = 4.0 * %var
+
+'``udiv``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = udiv <ty> <op1>, <op2>         ; yields {ty}:result
+      <result> = udiv exact <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``udiv``' instruction returns the quotient of its two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``udiv``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the unsigned integer quotient of the two operands.
+
+Note that unsigned integer division and signed integer division are
+distinct operations; for signed integer division, use '``sdiv``'.
+
+Division by zero leads to undefined behavior.
+
+If the ``exact`` keyword is present, the result value of the ``udiv`` is
+a :ref:`poison value <poisonvalues>` if %op1 is not a multiple of %op2 (as
+such, "((a udiv exact b) mul b) == a").
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = udiv i32 4, %var          ; yields {i32}:result = 4 / %var
+
+'``sdiv``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = sdiv <ty> <op1>, <op2>         ; yields {ty}:result
+      <result> = sdiv exact <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``sdiv``' instruction returns the quotient of its two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``sdiv``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the signed integer quotient of the two operands
+rounded towards zero.
+
+Note that signed integer division and unsigned integer division are
+distinct operations; for unsigned integer division, use '``udiv``'.
+
+Division by zero leads to undefined behavior. Overflow also leads to
+undefined behavior; this is a rare case, but can occur, for example, by
+doing a 32-bit division of -2147483648 by -1.
+
+If the ``exact`` keyword is present, the result value of the ``sdiv`` is
+a :ref:`poison value <poisonvalues>` if the result would be rounded.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = sdiv i32 4, %var          ; yields {i32}:result = 4 / %var
+
+.. _i_fdiv:
+
+'``fdiv``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fdiv [fast-math flags]* <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``fdiv``' instruction returns the quotient of its two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``fdiv``' instruction must be :ref:`floating
+point <t_floating>` or :ref:`vector <t_vector>` of floating point values.
+Both arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The value produced is the floating point quotient of the two operands.
+This instruction can also take any number of :ref:`fast-math
+flags <fastmath>`, which are optimization hints to enable otherwise
+unsafe floating point optimizations:
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = fdiv float 4.0, %var          ; yields {float}:result = 4.0 / %var
+
+'``urem``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = urem <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``urem``' instruction returns the remainder from the unsigned
+division of its two arguments.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``urem``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+This instruction returns the unsigned integer *remainder* of a division.
+This instruction always performs an unsigned division to get the
+remainder.
+
+Note that unsigned integer remainder and signed integer remainder are
+distinct operations; for signed integer remainder, use '``srem``'.
+
+Taking the remainder of a division by zero leads to undefined behavior.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = urem i32 4, %var          ; yields {i32}:result = 4 % %var
+
+'``srem``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = srem <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``srem``' instruction returns the remainder from the signed
+division of its two operands. This instruction can also take
+:ref:`vector <t_vector>` versions of the values in which case the elements
+must be integers.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``srem``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+This instruction returns the *remainder* of a division (where the result
+is either zero or has the same sign as the dividend, ``op1``), not the
+*modulo* operator (where the result is either zero or has the same sign
+as the divisor, ``op2``) of a value. For more information about the
+difference, see `The Math
+Forum <http://mathforum.org/dr.math/problems/anne.4.28.99.html>`_. For a
+table of how this is implemented in various languages, please see
+`Wikipedia: modulo
+operation <http://en.wikipedia.org/wiki/Modulo_operation>`_.
+
+Note that signed integer remainder and unsigned integer remainder are
+distinct operations; for unsigned integer remainder, use '``urem``'.
+
+Taking the remainder of a division by zero leads to undefined behavior.
+Overflow also leads to undefined behavior; this is a rare case, but can
+occur, for example, by taking the remainder of a 32-bit division of
+-2147483648 by -1. (The remainder doesn't actually overflow, but this
+rule lets srem be implemented using instructions that return both the
+result of the division and the remainder.)
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = srem i32 4, %var          ; yields {i32}:result = 4 % %var
+
+.. _i_frem:
+
+'``frem``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = frem [fast-math flags]* <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``frem``' instruction returns the remainder from the division of
+its two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``frem``' instruction must be :ref:`floating
+point <t_floating>` or :ref:`vector <t_vector>` of floating point values.
+Both arguments must have identical types.
+
+Semantics:
+""""""""""
+
+This instruction returns the *remainder* of a division. The remainder
+has the same sign as the dividend. This instruction can also take any
+number of :ref:`fast-math flags <fastmath>`, which are optimization hints
+to enable otherwise unsafe floating point optimizations:
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = frem float 4.0, %var          ; yields {float}:result = 4.0 % %var
+
+.. _bitwiseops:
+
+Bitwise Binary Operations
+-------------------------
+
+Bitwise binary operators are used to do various forms of bit-twiddling
+in a program. They are generally very efficient instructions and can
+commonly be strength reduced from other instructions. They require two
+operands of the same type, execute an operation on them, and produce a
+single value. The resulting value is the same type as its operands.
+
+'``shl``' Instruction
+^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = shl <ty> <op1>, <op2>           ; yields {ty}:result
+      <result> = shl nuw <ty> <op1>, <op2>       ; yields {ty}:result
+      <result> = shl nsw <ty> <op1>, <op2>       ; yields {ty}:result
+      <result> = shl nuw nsw <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``shl``' instruction returns the first operand shifted to the left
+a specified number of bits.
+
+Arguments:
+""""""""""
+
+Both arguments to the '``shl``' instruction must be the same
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer type.
+'``op2``' is treated as an unsigned value.
+
+Semantics:
+""""""""""
+
+The value produced is ``op1`` \* 2\ :sup:`op2` mod 2\ :sup:`n`,
+where ``n`` is the width of the result. If ``op2`` is (statically or
+dynamically) negative or equal to or larger than the number of bits in
+``op1``, the result is undefined. If the arguments are vectors, each
+vector element of ``op1`` is shifted by the corresponding shift amount
+in ``op2``.
+
+If the ``nuw`` keyword is present, then the shift produces a :ref:`poison
+value <poisonvalues>` if it shifts out any non-zero bits. If the
+``nsw`` keyword is present, then the shift produces a :ref:`poison
+value <poisonvalues>` if it shifts out any bits that disagree with the
+resultant sign bit. As such, NUW/NSW have the same semantics as they
+would if the shift were expressed as a mul instruction with the same
+nsw/nuw bits in (mul %op1, (shl 1, %op2)).
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = shl i32 4, %var   ; yields {i32}: 4 << %var
+      <result> = shl i32 4, 2      ; yields {i32}: 16
+      <result> = shl i32 1, 10     ; yields {i32}: 1024
+      <result> = shl i32 1, 32     ; undefined
+      <result> = shl <2 x i32> < i32 1, i32 1>, < i32 1, i32 2>   ; yields: result=<2 x i32> < i32 2, i32 4>
+
+'``lshr``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = lshr <ty> <op1>, <op2>         ; yields {ty}:result
+      <result> = lshr exact <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``lshr``' instruction (logical shift right) returns the first
+operand shifted to the right a specified number of bits with zero fill.
+
+Arguments:
+""""""""""
+
+Both arguments to the '``lshr``' instruction must be the same
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer type.
+'``op2``' is treated as an unsigned value.
+
+Semantics:
+""""""""""
+
+This instruction always performs a logical shift right operation. The
+most significant bits of the result will be filled with zero bits after
+the shift. If ``op2`` is (statically or dynamically) equal to or larger
+than the number of bits in ``op1``, the result is undefined. If the
+arguments are vectors, each vector element of ``op1`` is shifted by the
+corresponding shift amount in ``op2``.
+
+If the ``exact`` keyword is present, the result value of the ``lshr`` is
+a :ref:`poison value <poisonvalues>` if any of the bits shifted out are
+non-zero.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = lshr i32 4, 1   ; yields {i32}:result = 2
+      <result> = lshr i32 4, 2   ; yields {i32}:result = 1
+      <result> = lshr i8  4, 3   ; yields {i8}:result = 0
+      <result> = lshr i8 -2, 1   ; yields {i8}:result = 0x7FFFFFFF 
+      <result> = lshr i32 1, 32  ; undefined
+      <result> = lshr <2 x i32> < i32 -2, i32 4>, < i32 1, i32 2>   ; yields: result=<2 x i32> < i32 0x7FFFFFFF, i32 1>
+
+'``ashr``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = ashr <ty> <op1>, <op2>         ; yields {ty}:result
+      <result> = ashr exact <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``ashr``' instruction (arithmetic shift right) returns the first
+operand shifted to the right a specified number of bits with sign
+extension.
+
+Arguments:
+""""""""""
+
+Both arguments to the '``ashr``' instruction must be the same
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer type.
+'``op2``' is treated as an unsigned value.
+
+Semantics:
+""""""""""
+
+This instruction always performs an arithmetic shift right operation,
+The most significant bits of the result will be filled with the sign bit
+of ``op1``. If ``op2`` is (statically or dynamically) equal to or larger
+than the number of bits in ``op1``, the result is undefined. If the
+arguments are vectors, each vector element of ``op1`` is shifted by the
+corresponding shift amount in ``op2``.
+
+If the ``exact`` keyword is present, the result value of the ``ashr`` is
+a :ref:`poison value <poisonvalues>` if any of the bits shifted out are
+non-zero.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = ashr i32 4, 1   ; yields {i32}:result = 2
+      <result> = ashr i32 4, 2   ; yields {i32}:result = 1
+      <result> = ashr i8  4, 3   ; yields {i8}:result = 0
+      <result> = ashr i8 -2, 1   ; yields {i8}:result = -1
+      <result> = ashr i32 1, 32  ; undefined
+      <result> = ashr <2 x i32> < i32 -2, i32 4>, < i32 1, i32 3>   ; yields: result=<2 x i32> < i32 -1, i32 0>
+
+'``and``' Instruction
+^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = and <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``and``' instruction returns the bitwise logical and of its two
+operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``and``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The truth table used for the '``and``' instruction is:
+
++-----+-----+-----+
+| In0 | In1 | Out |
++-----+-----+-----+
+|   0 |   0 |   0 |
++-----+-----+-----+
+|   0 |   1 |   0 |
++-----+-----+-----+
+|   1 |   0 |   0 |
++-----+-----+-----+
+|   1 |   1 |   1 |
++-----+-----+-----+
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = and i32 4, %var         ; yields {i32}:result = 4 & %var
+      <result> = and i32 15, 40          ; yields {i32}:result = 8
+      <result> = and i32 4, 8            ; yields {i32}:result = 0
+
+'``or``' Instruction
+^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = or <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``or``' instruction returns the bitwise logical inclusive or of its
+two operands.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``or``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The truth table used for the '``or``' instruction is:
+
++-----+-----+-----+
+| In0 | In1 | Out |
++-----+-----+-----+
+|   0 |   0 |   0 |
++-----+-----+-----+
+|   0 |   1 |   1 |
++-----+-----+-----+
+|   1 |   0 |   1 |
++-----+-----+-----+
+|   1 |   1 |   1 |
++-----+-----+-----+
+
+Example:
+""""""""
+
+::
+
+      <result> = or i32 4, %var         ; yields {i32}:result = 4 | %var
+      <result> = or i32 15, 40          ; yields {i32}:result = 47
+      <result> = or i32 4, 8            ; yields {i32}:result = 12
+
+'``xor``' Instruction
+^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = xor <ty> <op1>, <op2>   ; yields {ty}:result
+
+Overview:
+"""""""""
+
+The '``xor``' instruction returns the bitwise logical exclusive or of
+its two operands. The ``xor`` is used to implement the "one's
+complement" operation, which is the "~" operator in C.
+
+Arguments:
+""""""""""
+
+The two arguments to the '``xor``' instruction must be
+:ref:`integer <t_integer>` or :ref:`vector <t_vector>` of integer values. Both
+arguments must have identical types.
+
+Semantics:
+""""""""""
+
+The truth table used for the '``xor``' instruction is:
+
++-----+-----+-----+
+| In0 | In1 | Out |
++-----+-----+-----+
+|   0 |   0 |   0 |
++-----+-----+-----+
+|   0 |   1 |   1 |
++-----+-----+-----+
+|   1 |   0 |   1 |
++-----+-----+-----+
+|   1 |   1 |   0 |
++-----+-----+-----+
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = xor i32 4, %var         ; yields {i32}:result = 4 ^ %var
+      <result> = xor i32 15, 40          ; yields {i32}:result = 39
+      <result> = xor i32 4, 8            ; yields {i32}:result = 12
+      <result> = xor i32 %V, -1          ; yields {i32}:result = ~%V
+
+Vector Operations
+-----------------
+
+LLVM supports several instructions to represent vector operations in a
+target-independent manner. These instructions cover the element-access
+and vector-specific operations needed to process vectors effectively.
+While LLVM does directly support these vector operations, many
+sophisticated algorithms will want to use target-specific intrinsics to
+take full advantage of a specific target.
+
+.. _i_extractelement:
+
+'``extractelement``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = extractelement <n x <ty>> <val>, i32 <idx>    ; yields <ty>
+
+Overview:
+"""""""""
+
+The '``extractelement``' instruction extracts a single scalar element
+from a vector at a specified index.
+
+Arguments:
+""""""""""
+
+The first operand of an '``extractelement``' instruction is a value of
+:ref:`vector <t_vector>` type. The second operand is an index indicating
+the position from which to extract the element. The index may be a
+variable.
+
+Semantics:
+""""""""""
+
+The result is a scalar of the same type as the element type of ``val``.
+Its value is the value at position ``idx`` of ``val``. If ``idx``
+exceeds the length of ``val``, the results are undefined.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = extractelement <4 x i32> %vec, i32 0    ; yields i32
+
+.. _i_insertelement:
+
+'``insertelement``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = insertelement <n x <ty>> <val>, <ty> <elt>, i32 <idx>    ; yields <n x <ty>>
+
+Overview:
+"""""""""
+
+The '``insertelement``' instruction inserts a scalar element into a
+vector at a specified index.
+
+Arguments:
+""""""""""
+
+The first operand of an '``insertelement``' instruction is a value of
+:ref:`vector <t_vector>` type. The second operand is a scalar value whose
+type must equal the element type of the first operand. The third operand
+is an index indicating the position at which to insert the value. The
+index may be a variable.
+
+Semantics:
+""""""""""
+
+The result is a vector of the same type as ``val``. Its element values
+are those of ``val`` except at position ``idx``, where it gets the value
+``elt``. If ``idx`` exceeds the length of ``val``, the results are
+undefined.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = insertelement <4 x i32> %vec, i32 1, i32 0    ; yields <4 x i32>
+
+.. _i_shufflevector:
+
+'``shufflevector``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = shufflevector <n x <ty>> <v1>, <n x <ty>> <v2>, <m x i32> <mask>    ; yields <m x <ty>>
+
+Overview:
+"""""""""
+
+The '``shufflevector``' instruction constructs a permutation of elements
+from two input vectors, returning a vector with the same element type as
+the input and length that is the same as the shuffle mask.
+
+Arguments:
+""""""""""
+
+The first two operands of a '``shufflevector``' instruction are vectors
+with the same type. The third argument is a shuffle mask whose element
+type is always 'i32'. The result of the instruction is a vector whose
+length is the same as the shuffle mask and whose element type is the
+same as the element type of the first two operands.
+
+The shuffle mask operand is required to be a constant vector with either
+constant integer or undef values.
+
+Semantics:
+""""""""""
+
+The elements of the two input vectors are numbered from left to right
+across both of the vectors. The shuffle mask operand specifies, for each
+element of the result vector, which element of the two input vectors the
+result element gets. The element selector may be undef (meaning "don't
+care") and the second operand may be undef if performing a shuffle from
+only one vector.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = shufflevector <4 x i32> %v1, <4 x i32> %v2,
+                              <4 x i32> <i32 0, i32 4, i32 1, i32 5>  ; yields <4 x i32>
+      <result> = shufflevector <4 x i32> %v1, <4 x i32> undef,
+                              <4 x i32> <i32 0, i32 1, i32 2, i32 3>  ; yields <4 x i32> - Identity shuffle.
+      <result> = shufflevector <8 x i32> %v1, <8 x i32> undef,
+                              <4 x i32> <i32 0, i32 1, i32 2, i32 3>  ; yields <4 x i32>
+      <result> = shufflevector <4 x i32> %v1, <4 x i32> %v2,
+                              <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >  ; yields <8 x i32>
+
+Aggregate Operations
+--------------------
+
+LLVM supports several instructions for working with
+:ref:`aggregate <t_aggregate>` values.
+
+.. _i_extractvalue:
+
+'``extractvalue``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = extractvalue <aggregate type> <val>, <idx>{, <idx>}*
+
+Overview:
+"""""""""
+
+The '``extractvalue``' instruction extracts the value of a member field
+from an :ref:`aggregate <t_aggregate>` value.
+
+Arguments:
+""""""""""
+
+The first operand of an '``extractvalue``' instruction is a value of
+:ref:`struct <t_struct>` or :ref:`array <t_array>` type. The operands are
+constant indices to specify which value to extract in a similar manner
+as indices in a '``getelementptr``' instruction.
+
+The major differences to ``getelementptr`` indexing are:
+
+-  Since the value being indexed is not a pointer, the first index is
+   omitted and assumed to be zero.
+-  At least one index must be specified.
+-  Not only struct indices but also array indices must be in bounds.
+
+Semantics:
+""""""""""
+
+The result is the value at the position in the aggregate specified by
+the index operands.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = extractvalue {i32, float} %agg, 0    ; yields i32
+
+.. _i_insertvalue:
+
+'``insertvalue``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = insertvalue <aggregate type> <val>, <ty> <elt>, <idx>{, <idx>}*    ; yields <aggregate type>
+
+Overview:
+"""""""""
+
+The '``insertvalue``' instruction inserts a value into a member field in
+an :ref:`aggregate <t_aggregate>` value.
+
+Arguments:
+""""""""""
+
+The first operand of an '``insertvalue``' instruction is a value of
+:ref:`struct <t_struct>` or :ref:`array <t_array>` type. The second operand is
+a first-class value to insert. The following operands are constant
+indices indicating the position at which to insert the value in a
+similar manner as indices in a '``extractvalue``' instruction. The value
+to insert must have the same type as the value identified by the
+indices.
+
+Semantics:
+""""""""""
+
+The result is an aggregate of the same type as ``val``. Its value is
+that of ``val`` except that the value at the position specified by the
+indices is that of ``elt``.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %agg1 = insertvalue {i32, float} undef, i32 1, 0              ; yields {i32 1, float undef}
+      %agg2 = insertvalue {i32, float} %agg1, float %val, 1         ; yields {i32 1, float %val}
+      %agg3 = insertvalue {i32, {float}} %agg1, float %val, 1, 0    ; yields {i32 1, float %val}
+
+.. _memoryops:
+
+Memory Access and Addressing Operations
+---------------------------------------
+
+A key design point of an SSA-based representation is how it represents
+memory. In LLVM, no memory locations are in SSA form, which makes things
+very simple. This section describes how to read, write, and allocate
+memory in LLVM.
+
+.. _i_alloca:
+
+'``alloca``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = alloca <type>[, <ty> <NumElements>][, align <alignment>]     ; yields {type*}:result
+
+Overview:
+"""""""""
+
+The '``alloca``' instruction allocates memory on the stack frame of the
+currently executing function, to be automatically released when this
+function returns to its caller. The object is always allocated in the
+generic address space (address space zero).
+
+Arguments:
+""""""""""
+
+The '``alloca``' instruction allocates ``sizeof(<type>)*NumElements``
+bytes of memory on the runtime stack, returning a pointer of the
+appropriate type to the program. If "NumElements" is specified, it is
+the number of elements allocated, otherwise "NumElements" is defaulted
+to be one. If a constant alignment is specified, the value result of the
+allocation is guaranteed to be aligned to at least that boundary. If not
+specified, or if zero, the target can choose to align the allocation on
+any convenient boundary compatible with the type.
+
+'``type``' may be any sized type.
+
+Semantics:
+""""""""""
+
+Memory is allocated; a pointer is returned. The operation is undefined
+if there is insufficient stack space for the allocation. '``alloca``'d
+memory is automatically released when the function returns. The
+'``alloca``' instruction is commonly used to represent automatic
+variables that must have an address available. When the function returns
+(either with the ``ret`` or ``resume`` instructions), the memory is
+reclaimed. Allocating zero bytes is legal, but the result is undefined.
+The order in which memory is allocated (ie., which way the stack grows)
+is not specified.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %ptr = alloca i32                             ; yields {i32*}:ptr
+      %ptr = alloca i32, i32 4                      ; yields {i32*}:ptr
+      %ptr = alloca i32, i32 4, align 1024          ; yields {i32*}:ptr
+      %ptr = alloca i32, align 1024                 ; yields {i32*}:ptr
+
+.. _i_load:
+
+'``load``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = load [volatile] <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>][, !invariant.load !<index>]
+      <result> = load atomic [volatile] <ty>* <pointer> [singlethread] <ordering>, align <alignment>
+      !<index> = !{ i32 1 }
+
+Overview:
+"""""""""
+
+The '``load``' instruction is used to read from memory.
+
+Arguments:
+""""""""""
+
+The argument to the '``load``' instruction specifies the memory address
+from which to load. The pointer must point to a :ref:`first
+class <t_firstclass>` type. If the ``load`` is marked as ``volatile``,
+then the optimizer is not allowed to modify the number or order of
+execution of this ``load`` with other :ref:`volatile
+operations <volatile>`.
+
+If the ``load`` is marked as ``atomic``, it takes an extra
+:ref:`ordering <ordering>` and optional ``singlethread`` argument. The
+``release`` and ``acq_rel`` orderings are not valid on ``load``
+instructions. Atomic loads produce :ref:`defined <memmodel>` results
+when they may see multiple atomic stores. The type of the pointee must
+be an integer type whose bit width is a power of two greater than or
+equal to eight and less than or equal to a target-specific size limit.
+``align`` must be explicitly specified on atomic loads, and the load has
+undefined behavior if the alignment is not set to a value which is at
+least the size in bytes of the pointee. ``!nontemporal`` does not have
+any defined semantics for atomic loads.
+
+The optional constant ``align`` argument specifies the alignment of the
+operation (that is, the alignment of the memory address). A value of 0
+or an omitted ``align`` argument means that the operation has the abi
+alignment for the target. It is the responsibility of the code emitter
+to ensure that the alignment information is correct. Overestimating the
+alignment results in undefined behavior. Underestimating the alignment
+may produce less efficient code. An alignment of 1 is always safe.
+
+The optional ``!nontemporal`` metadata must reference a single
+metatadata name <index> corresponding to a metadata node with one
+``i32`` entry of value 1. The existence of the ``!nontemporal``
+metatadata on the instruction tells the optimizer and code generator
+that this load is not expected to be reused in the cache. The code
+generator may select special instructions to save cache bandwidth, such
+as the ``MOVNT`` instruction on x86.
+
+The optional ``!invariant.load`` metadata must reference a single
+metatadata name <index> corresponding to a metadata node with no
+entries. The existence of the ``!invariant.load`` metatadata on the
+instruction tells the optimizer and code generator that this load
+address points to memory which does not change value during program
+execution. The optimizer may then move this load around, for example, by
+hoisting it out of loops using loop invariant code motion.
+
+Semantics:
+""""""""""
+
+The location of memory pointed to is loaded. If the value being loaded
+is of scalar type then the number of bytes read does not exceed the
+minimum number of bytes needed to hold all bits of the type. For
+example, loading an ``i24`` reads at most three bytes. When loading a
+value of a type like ``i20`` with a size that is not an integral number
+of bytes, the result is undefined if the value was not originally
+written using a store of the same type.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %ptr = alloca i32                               ; yields {i32*}:ptr
+      store i32 3, i32* %ptr                          ; yields {void}
+      %val = load i32* %ptr                           ; yields {i32}:val = i32 3
+
+.. _i_store:
+
+'``store``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      store [volatile] <ty> <value>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>]        ; yields {void}
+      store atomic [volatile] <ty> <value>, <ty>* <pointer> [singlethread] <ordering>, align <alignment>  ; yields {void}
+
+Overview:
+"""""""""
+
+The '``store``' instruction is used to write to memory.
+
+Arguments:
+""""""""""
+
+There are two arguments to the '``store``' instruction: a value to store
+and an address at which to store it. The type of the '``<pointer>``'
+operand must be a pointer to the :ref:`first class <t_firstclass>` type of
+the '``<value>``' operand. If the ``store`` is marked as ``volatile``,
+then the optimizer is not allowed to modify the number or order of
+execution of this ``store`` with other :ref:`volatile
+operations <volatile>`.
+
+If the ``store`` is marked as ``atomic``, it takes an extra
+:ref:`ordering <ordering>` and optional ``singlethread`` argument. The
+``acquire`` and ``acq_rel`` orderings aren't valid on ``store``
+instructions. Atomic loads produce :ref:`defined <memmodel>` results
+when they may see multiple atomic stores. The type of the pointee must
+be an integer type whose bit width is a power of two greater than or
+equal to eight and less than or equal to a target-specific size limit.
+``align`` must be explicitly specified on atomic stores, and the store
+has undefined behavior if the alignment is not set to a value which is
+at least the size in bytes of the pointee. ``!nontemporal`` does not
+have any defined semantics for atomic stores.
+
+The optional constant "align" argument specifies the alignment of the
+operation (that is, the alignment of the memory address). A value of 0
+or an omitted "align" argument means that the operation has the abi
+alignment for the target. It is the responsibility of the code emitter
+to ensure that the alignment information is correct. Overestimating the
+alignment results in an undefined behavior. Underestimating the
+alignment may produce less efficient code. An alignment of 1 is always
+safe.
+
+The optional !nontemporal metadata must reference a single metatadata
+name <index> corresponding to a metadata node with one i32 entry of
+value 1. The existence of the !nontemporal metatadata on the instruction
+tells the optimizer and code generator that this load is not expected to
+be reused in the cache. The code generator may select special
+instructions to save cache bandwidth, such as the MOVNT instruction on
+x86.
+
+Semantics:
+""""""""""
+
+The contents of memory are updated to contain '``<value>``' at the
+location specified by the '``<pointer>``' operand. If '``<value>``' is
+of scalar type then the number of bytes written does not exceed the
+minimum number of bytes needed to hold all bits of the type. For
+example, storing an ``i24`` writes at most three bytes. When writing a
+value of a type like ``i20`` with a size that is not an integral number
+of bytes, it is unspecified what happens to the extra bits that do not
+belong to the type, but they will typically be overwritten.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %ptr = alloca i32                               ; yields {i32*}:ptr
+      store i32 3, i32* %ptr                          ; yields {void}
+      %val = load i32* %ptr                           ; yields {i32}:val = i32 3
+
+.. _i_fence:
+
+'``fence``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      fence [singlethread] <ordering>                   ; yields {void}
+
+Overview:
+"""""""""
+
+The '``fence``' instruction is used to introduce happens-before edges
+between operations.
+
+Arguments:
+""""""""""
+
+'``fence``' instructions take an :ref:`ordering <ordering>` argument which
+defines what *synchronizes-with* edges they add. They can only be given
+``acquire``, ``release``, ``acq_rel``, and ``seq_cst`` orderings.
+
+Semantics:
+""""""""""
+
+A fence A which has (at least) ``release`` ordering semantics
+*synchronizes with* a fence B with (at least) ``acquire`` ordering
+semantics if and only if there exist atomic operations X and Y, both
+operating on some atomic object M, such that A is sequenced before X, X
+modifies M (either directly or through some side effect of a sequence
+headed by X), Y is sequenced before B, and Y observes M. This provides a
+*happens-before* dependency between A and B. Rather than an explicit
+``fence``, one (but not both) of the atomic operations X or Y might
+provide a ``release`` or ``acquire`` (resp.) ordering constraint and
+still *synchronize-with* the explicit ``fence`` and establish the
+*happens-before* edge.
+
+A ``fence`` which has ``seq_cst`` ordering, in addition to having both
+``acquire`` and ``release`` semantics specified above, participates in
+the global program order of other ``seq_cst`` operations and/or fences.
+
+The optional ":ref:`singlethread <singlethread>`" argument specifies
+that the fence only synchronizes with other fences in the same thread.
+(This is useful for interacting with signal handlers.)
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      fence acquire                          ; yields {void}
+      fence singlethread seq_cst             ; yields {void}
+
+.. _i_cmpxchg:
+
+'``cmpxchg``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      cmpxchg [volatile] <ty>* <pointer>, <ty> <cmp>, <ty> <new> [singlethread] <ordering>  ; yields {ty}
+
+Overview:
+"""""""""
+
+The '``cmpxchg``' instruction is used to atomically modify memory. It
+loads a value in memory and compares it to a given value. If they are
+equal, it stores a new value into the memory.
+
+Arguments:
+""""""""""
+
+There are three arguments to the '``cmpxchg``' instruction: an address
+to operate on, a value to compare to the value currently be at that
+address, and a new value to place at that address if the compared values
+are equal. The type of '<cmp>' must be an integer type whose bit width
+is a power of two greater than or equal to eight and less than or equal
+to a target-specific size limit. '<cmp>' and '<new>' must have the same
+type, and the type of '<pointer>' must be a pointer to that type. If the
+``cmpxchg`` is marked as ``volatile``, then the optimizer is not allowed
+to modify the number or order of execution of this ``cmpxchg`` with
+other :ref:`volatile operations <volatile>`.
+
+The :ref:`ordering <ordering>` argument specifies how this ``cmpxchg``
+synchronizes with other atomic operations.
+
+The optional "``singlethread``" argument declares that the ``cmpxchg``
+is only atomic with respect to code (usually signal handlers) running in
+the same thread as the ``cmpxchg``. Otherwise the cmpxchg is atomic with
+respect to all other code in the system.
+
+The pointer passed into cmpxchg must have alignment greater than or
+equal to the size in memory of the operand.
+
+Semantics:
+""""""""""
+
+The contents of memory at the location specified by the '``<pointer>``'
+operand is read and compared to '``<cmp>``'; if the read value is the
+equal, '``<new>``' is written. The original value at the location is
+returned.
+
+A successful ``cmpxchg`` is a read-modify-write instruction for the purpose
+of identifying release sequences. A failed ``cmpxchg`` is equivalent to an
+atomic load with an ordering parameter determined by dropping any
+``release`` part of the ``cmpxchg``'s ordering.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+    entry:
+      %orig = atomic load i32* %ptr unordered                   ; yields {i32}
+      br label %loop
+
+    loop:
+      %cmp = phi i32 [ %orig, %entry ], [%old, %loop]
+      %squared = mul i32 %cmp, %cmp
+      %old = cmpxchg i32* %ptr, i32 %cmp, i32 %squared          ; yields {i32}
+      %success = icmp eq i32 %cmp, %old
+      br i1 %success, label %done, label %loop
+
+    done:
+      ...
+
+.. _i_atomicrmw:
+
+'``atomicrmw``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      atomicrmw [volatile] <operation> <ty>* <pointer>, <ty> <value> [singlethread] <ordering>                   ; yields {ty}
+
+Overview:
+"""""""""
+
+The '``atomicrmw``' instruction is used to atomically modify memory.
+
+Arguments:
+""""""""""
+
+There are three arguments to the '``atomicrmw``' instruction: an
+operation to apply, an address whose value to modify, an argument to the
+operation. The operation must be one of the following keywords:
+
+-  xchg
+-  add
+-  sub
+-  and
+-  nand
+-  or
+-  xor
+-  max
+-  min
+-  umax
+-  umin
+
+The type of '<value>' must be an integer type whose bit width is a power
+of two greater than or equal to eight and less than or equal to a
+target-specific size limit. The type of the '``<pointer>``' operand must
+be a pointer to that type. If the ``atomicrmw`` is marked as
+``volatile``, then the optimizer is not allowed to modify the number or
+order of execution of this ``atomicrmw`` with other :ref:`volatile
+operations <volatile>`.
+
+Semantics:
+""""""""""
+
+The contents of memory at the location specified by the '``<pointer>``'
+operand are atomically read, modified, and written back. The original
+value at the location is returned. The modification is specified by the
+operation argument:
+
+-  xchg: ``*ptr = val``
+-  add: ``*ptr = *ptr + val``
+-  sub: ``*ptr = *ptr - val``
+-  and: ``*ptr = *ptr & val``
+-  nand: ``*ptr = ~(*ptr & val)``
+-  or: ``*ptr = *ptr | val``
+-  xor: ``*ptr = *ptr ^ val``
+-  max: ``*ptr = *ptr > val ? *ptr : val`` (using a signed comparison)
+-  min: ``*ptr = *ptr < val ? *ptr : val`` (using a signed comparison)
+-  umax: ``*ptr = *ptr > val ? *ptr : val`` (using an unsigned
+   comparison)
+-  umin: ``*ptr = *ptr < val ? *ptr : val`` (using an unsigned
+   comparison)
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %old = atomicrmw add i32* %ptr, i32 1 acquire                        ; yields {i32}
+
+.. _i_getelementptr:
+
+'``getelementptr``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = getelementptr <pty>* <ptrval>{, <ty> <idx>}*
+      <result> = getelementptr inbounds <pty>* <ptrval>{, <ty> <idx>}*
+      <result> = getelementptr <ptr vector> ptrval, <vector index type> idx
+
+Overview:
+"""""""""
+
+The '``getelementptr``' instruction is used to get the address of a
+subelement of an :ref:`aggregate <t_aggregate>` data structure. It performs
+address calculation only and does not access memory.
+
+Arguments:
+""""""""""
+
+The first argument is always a pointer or a vector of pointers, and
+forms the basis of the calculation. The remaining arguments are indices
+that indicate which of the elements of the aggregate object are indexed.
+The interpretation of each index is dependent on the type being indexed
+into. The first index always indexes the pointer value given as the
+first argument, the second index indexes a value of the type pointed to
+(not necessarily the value directly pointed to, since the first index
+can be non-zero), etc. The first type indexed into must be a pointer
+value, subsequent types can be arrays, vectors, and structs. Note that
+subsequent types being indexed into can never be pointers, since that
+would require loading the pointer before continuing calculation.
+
+The type of each index argument depends on the type it is indexing into.
+When indexing into a (optionally packed) structure, only ``i32`` integer
+**constants** are allowed (when using a vector of indices they must all
+be the **same** ``i32`` integer constant). When indexing into an array,
+pointer or vector, integers of any width are allowed, and they are not
+required to be constant. These integers are treated as signed values
+where relevant.
+
+For example, let's consider a C code fragment and how it gets compiled
+to LLVM:
+
+.. code-block:: c
+
+    struct RT {
+      char A;
+      int B[10][20];
+      char C;
+    };
+    struct ST {
+      int X;
+      double Y;
+      struct RT Z;
+    };
+
+    int *foo(struct ST *s) {
+      return &s[1].Z.B[5][13];
+    }
+
+The LLVM code generated by Clang is:
+
+.. code-block:: llvm
+
+    %struct.RT = type { i8, [10 x [20 x i32]], i8 }
+    %struct.ST = type { i32, double, %struct.RT }
+
+    define i32* @foo(%struct.ST* %s) nounwind uwtable readnone optsize ssp {
+    entry:
+      %arrayidx = getelementptr inbounds %struct.ST* %s, i64 1, i32 2, i32 1, i64 5, i64 13
+      ret i32* %arrayidx
+    }
+
+Semantics:
+""""""""""
+
+In the example above, the first index is indexing into the
+'``%struct.ST*``' type, which is a pointer, yielding a '``%struct.ST``'
+= '``{ i32, double, %struct.RT }``' type, a structure. The second index
+indexes into the third element of the structure, yielding a
+'``%struct.RT``' = '``{ i8 , [10 x [20 x i32]], i8 }``' type, another
+structure. The third index indexes into the second element of the
+structure, yielding a '``[10 x [20 x i32]]``' type, an array. The two
+dimensions of the array are subscripted into, yielding an '``i32``'
+type. The '``getelementptr``' instruction returns a pointer to this
+element, thus computing a value of '``i32*``' type.
+
+Note that it is perfectly legal to index partially through a structure,
+returning a pointer to an inner element. Because of this, the LLVM code
+for the given testcase is equivalent to:
+
+.. code-block:: llvm
+
+    define i32* @foo(%struct.ST* %s) {
+      %t1 = getelementptr %struct.ST* %s, i32 1                 ; yields %struct.ST*:%t1
+      %t2 = getelementptr %struct.ST* %t1, i32 0, i32 2         ; yields %struct.RT*:%t2
+      %t3 = getelementptr %struct.RT* %t2, i32 0, i32 1         ; yields [10 x [20 x i32]]*:%t3
+      %t4 = getelementptr [10 x [20 x i32]]* %t3, i32 0, i32 5  ; yields [20 x i32]*:%t4
+      %t5 = getelementptr [20 x i32]* %t4, i32 0, i32 13        ; yields i32*:%t5
+      ret i32* %t5
+    }
+
+If the ``inbounds`` keyword is present, the result value of the
+``getelementptr`` is a :ref:`poison value <poisonvalues>` if the base
+pointer is not an *in bounds* address of an allocated object, or if any
+of the addresses that would be formed by successive addition of the
+offsets implied by the indices to the base address with infinitely
+precise signed arithmetic are not an *in bounds* address of that
+allocated object. The *in bounds* addresses for an allocated object are
+all the addresses that point into the object, plus the address one byte
+past the end. In cases where the base is a vector of pointers the
+``inbounds`` keyword applies to each of the computations element-wise.
+
+If the ``inbounds`` keyword is not present, the offsets are added to the
+base address with silently-wrapping two's complement arithmetic. If the
+offsets have a different width from the pointer, they are sign-extended
+or truncated to the width of the pointer. The result value of the
+``getelementptr`` may be outside the object pointed to by the base
+pointer. The result value may not necessarily be used to access memory
+though, even if it happens to point into allocated storage. See the
+:ref:`Pointer Aliasing Rules <pointeraliasing>` section for more
+information.
+
+The getelementptr instruction is often confusing. For some more insight
+into how it works, see :doc:`the getelementptr FAQ <GetElementPtr>`.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+        ; yields [12 x i8]*:aptr
+        %aptr = getelementptr {i32, [12 x i8]}* %saptr, i64 0, i32 1
+        ; yields i8*:vptr
+        %vptr = getelementptr {i32, <2 x i8>}* %svptr, i64 0, i32 1, i32 1
+        ; yields i8*:eptr
+        %eptr = getelementptr [12 x i8]* %aptr, i64 0, i32 1
+        ; yields i32*:iptr
+        %iptr = getelementptr [10 x i32]* @arr, i16 0, i16 0
+
+In cases where the pointer argument is a vector of pointers, each index
+must be a vector with the same number of elements. For example:
+
+.. code-block:: llvm
+
+     %A = getelementptr <4 x i8*> %ptrs, <4 x i64> %offsets,
+
+Conversion Operations
+---------------------
+
+The instructions in this category are the conversion instructions
+(casting) which all take a single operand and a type. They perform
+various bit conversions on the operand.
+
+'``trunc .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = trunc <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``trunc``' instruction truncates its operand to the type ``ty2``.
+
+Arguments:
+""""""""""
+
+The '``trunc``' instruction takes a value to trunc, and a type to trunc
+it to. Both types must be of :ref:`integer <t_integer>` types, or vectors
+of the same number of integers. The bit size of the ``value`` must be
+larger than the bit size of the destination type, ``ty2``. Equal sized
+types are not allowed.
+
+Semantics:
+""""""""""
+
+The '``trunc``' instruction truncates the high order bits in ``value``
+and converts the remaining bits to ``ty2``. Since the source size must
+be larger than the destination size, ``trunc`` cannot be a *no-op cast*.
+It will always truncate bits.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = trunc i32 257 to i8                        ; yields i8:1
+      %Y = trunc i32 123 to i1                        ; yields i1:true
+      %Z = trunc i32 122 to i1                        ; yields i1:false
+      %W = trunc <2 x i16> <i16 8, i16 7> to <2 x i8> ; yields <i8 8, i8 7>
+
+'``zext .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = zext <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``zext``' instruction zero extends its operand to type ``ty2``.
+
+Arguments:
+""""""""""
+
+The '``zext``' instruction takes a value to cast, and a type to cast it
+to. Both types must be of :ref:`integer <t_integer>` types, or vectors of
+the same number of integers. The bit size of the ``value`` must be
+smaller than the bit size of the destination type, ``ty2``.
+
+Semantics:
+""""""""""
+
+The ``zext`` fills the high order bits of the ``value`` with zero bits
+until it reaches the size of the destination type, ``ty2``.
+
+When zero extending from i1, the result will always be either 0 or 1.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = zext i32 257 to i64              ; yields i64:257
+      %Y = zext i1 true to i32              ; yields i32:1
+      %Z = zext <2 x i16> <i16 8, i16 7> to <2 x i32> ; yields <i32 8, i32 7>
+
+'``sext .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = sext <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``sext``' sign extends ``value`` to the type ``ty2``.
+
+Arguments:
+""""""""""
+
+The '``sext``' instruction takes a value to cast, and a type to cast it
+to. Both types must be of :ref:`integer <t_integer>` types, or vectors of
+the same number of integers. The bit size of the ``value`` must be
+smaller than the bit size of the destination type, ``ty2``.
+
+Semantics:
+""""""""""
+
+The '``sext``' instruction performs a sign extension by copying the sign
+bit (highest order bit) of the ``value`` until it reaches the bit size
+of the type ``ty2``.
+
+When sign extending from i1, the extension always results in -1 or 0.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = sext i8  -1 to i16              ; yields i16   :65535
+      %Y = sext i1 true to i32             ; yields i32:-1
+      %Z = sext <2 x i16> <i16 8, i16 7> to <2 x i32> ; yields <i32 8, i32 7>
+
+'``fptrunc .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fptrunc <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``fptrunc``' instruction truncates ``value`` to type ``ty2``.
+
+Arguments:
+""""""""""
+
+The '``fptrunc``' instruction takes a :ref:`floating point <t_floating>`
+value to cast and a :ref:`floating point <t_floating>` type to cast it to.
+The size of ``value`` must be larger than the size of ``ty2``. This
+implies that ``fptrunc`` cannot be used to make a *no-op cast*.
+
+Semantics:
+""""""""""
+
+The '``fptrunc``' instruction truncates a ``value`` from a larger
+:ref:`floating point <t_floating>` type to a smaller :ref:`floating
+point <t_floating>` type. If the value cannot fit within the
+destination type, ``ty2``, then the results are undefined.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = fptrunc double 123.0 to float         ; yields float:123.0
+      %Y = fptrunc double 1.0E+300 to float      ; yields undefined
+
+'``fpext .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fpext <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``fpext``' extends a floating point ``value`` to a larger floating
+point value.
+
+Arguments:
+""""""""""
+
+The '``fpext``' instruction takes a :ref:`floating point <t_floating>`
+``value`` to cast, and a :ref:`floating point <t_floating>` type to cast it
+to. The source type must be smaller than the destination type.
+
+Semantics:
+""""""""""
+
+The '``fpext``' instruction extends the ``value`` from a smaller
+:ref:`floating point <t_floating>` type to a larger :ref:`floating
+point <t_floating>` type. The ``fpext`` cannot be used to make a
+*no-op cast* because it always changes bits. Use ``bitcast`` to make a
+*no-op cast* for a floating point cast.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = fpext float 3.125 to double         ; yields double:3.125000e+00
+      %Y = fpext double %X to fp128            ; yields fp128:0xL00000000000000004000900000000000
+
+'``fptoui .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fptoui <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``fptoui``' converts a floating point ``value`` to its unsigned
+integer equivalent of type ``ty2``.
+
+Arguments:
+""""""""""
+
+The '``fptoui``' instruction takes a value to cast, which must be a
+scalar or vector :ref:`floating point <t_floating>` value, and a type to
+cast it to ``ty2``, which must be an :ref:`integer <t_integer>` type. If
+``ty`` is a vector floating point type, ``ty2`` must be a vector integer
+type with the same number of elements as ``ty``
+
+Semantics:
+""""""""""
+
+The '``fptoui``' instruction converts its :ref:`floating
+point <t_floating>` operand into the nearest (rounding towards zero)
+unsigned integer value. If the value cannot fit in ``ty2``, the results
+are undefined.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = fptoui double 123.0 to i32      ; yields i32:123
+      %Y = fptoui float 1.0E+300 to i1     ; yields undefined:1
+      %Z = fptoui float 1.04E+17 to i8     ; yields undefined:1
+
+'``fptosi .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fptosi <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``fptosi``' instruction converts :ref:`floating point <t_floating>`
+``value`` to type ``ty2``.
+
+Arguments:
+""""""""""
+
+The '``fptosi``' instruction takes a value to cast, which must be a
+scalar or vector :ref:`floating point <t_floating>` value, and a type to
+cast it to ``ty2``, which must be an :ref:`integer <t_integer>` type. If
+``ty`` is a vector floating point type, ``ty2`` must be a vector integer
+type with the same number of elements as ``ty``
+
+Semantics:
+""""""""""
+
+The '``fptosi``' instruction converts its :ref:`floating
+point <t_floating>` operand into the nearest (rounding towards zero)
+signed integer value. If the value cannot fit in ``ty2``, the results
+are undefined.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = fptosi double -123.0 to i32      ; yields i32:-123
+      %Y = fptosi float 1.0E-247 to i1      ; yields undefined:1
+      %Z = fptosi float 1.04E+17 to i8      ; yields undefined:1
+
+'``uitofp .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = uitofp <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``uitofp``' instruction regards ``value`` as an unsigned integer
+and converts that value to the ``ty2`` type.
+
+Arguments:
+""""""""""
+
+The '``uitofp``' instruction takes a value to cast, which must be a
+scalar or vector :ref:`integer <t_integer>` value, and a type to cast it to
+``ty2``, which must be an :ref:`floating point <t_floating>` type. If
+``ty`` is a vector integer type, ``ty2`` must be a vector floating point
+type with the same number of elements as ``ty``
+
+Semantics:
+""""""""""
+
+The '``uitofp``' instruction interprets its operand as an unsigned
+integer quantity and converts it to the corresponding floating point
+value. If the value cannot fit in the floating point value, the results
+are undefined.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = uitofp i32 257 to float         ; yields float:257.0
+      %Y = uitofp i8 -1 to double          ; yields double:255.0
+
+'``sitofp .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = sitofp <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``sitofp``' instruction regards ``value`` as a signed integer and
+converts that value to the ``ty2`` type.
+
+Arguments:
+""""""""""
+
+The '``sitofp``' instruction takes a value to cast, which must be a
+scalar or vector :ref:`integer <t_integer>` value, and a type to cast it to
+``ty2``, which must be an :ref:`floating point <t_floating>` type. If
+``ty`` is a vector integer type, ``ty2`` must be a vector floating point
+type with the same number of elements as ``ty``
+
+Semantics:
+""""""""""
+
+The '``sitofp``' instruction interprets its operand as a signed integer
+quantity and converts it to the corresponding floating point value. If
+the value cannot fit in the floating point value, the results are
+undefined.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = sitofp i32 257 to float         ; yields float:257.0
+      %Y = sitofp i8 -1 to double          ; yields double:-1.0
+
+.. _i_ptrtoint:
+
+'``ptrtoint .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = ptrtoint <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``ptrtoint``' instruction converts the pointer or a vector of
+pointers ``value`` to the integer (or vector of integers) type ``ty2``.
+
+Arguments:
+""""""""""
+
+The '``ptrtoint``' instruction takes a ``value`` to cast, which must be
+a a value of type :ref:`pointer <t_pointer>` or a vector of pointers, and a
+type to cast it to ``ty2``, which must be an :ref:`integer <t_integer>` or
+a vector of integers type.
+
+Semantics:
+""""""""""
+
+The '``ptrtoint``' instruction converts ``value`` to integer type
+``ty2`` by interpreting the pointer value as an integer and either
+truncating or zero extending that value to the size of the integer type.
+If ``value`` is smaller than ``ty2`` then a zero extension is done. If
+``value`` is larger than ``ty2`` then a truncation is done. If they are
+the same size, then nothing is done (*no-op cast*) other than a type
+change.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = ptrtoint i32* %P to i8                         ; yields truncation on 32-bit architecture
+      %Y = ptrtoint i32* %P to i64                        ; yields zero extension on 32-bit architecture
+      %Z = ptrtoint <4 x i32*> %P to <4 x i64>; yields vector zero extension for a vector of addresses on 32-bit architecture
+
+.. _i_inttoptr:
+
+'``inttoptr .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = inttoptr <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``inttoptr``' instruction converts an integer ``value`` to a
+pointer type, ``ty2``.
+
+Arguments:
+""""""""""
+
+The '``inttoptr``' instruction takes an :ref:`integer <t_integer>` value to
+cast, and a type to cast it to, which must be a :ref:`pointer <t_pointer>`
+type.
+
+Semantics:
+""""""""""
+
+The '``inttoptr``' instruction converts ``value`` to type ``ty2`` by
+applying either a zero extension or a truncation depending on the size
+of the integer ``value``. If ``value`` is larger than the size of a
+pointer then a truncation is done. If ``value`` is smaller than the size
+of a pointer then a zero extension is done. If they are the same size,
+nothing is done (*no-op cast*).
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = inttoptr i32 255 to i32*          ; yields zero extension on 64-bit architecture
+      %Y = inttoptr i32 255 to i32*          ; yields no-op on 32-bit architecture
+      %Z = inttoptr i64 0 to i32*            ; yields truncation on 32-bit architecture
+      %Z = inttoptr <4 x i32> %G to <4 x i8*>; yields truncation of vector G to four pointers
+
+.. _i_bitcast:
+
+'``bitcast .. to``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = bitcast <ty> <value> to <ty2>             ; yields ty2
+
+Overview:
+"""""""""
+
+The '``bitcast``' instruction converts ``value`` to type ``ty2`` without
+changing any bits.
+
+Arguments:
+""""""""""
+
+The '``bitcast``' instruction takes a value to cast, which must be a
+non-aggregate first class value, and a type to cast it to, which must
+also be a non-aggregate :ref:`first class <t_firstclass>` type. The bit
+sizes of ``value`` and the destination type, ``ty2``, must be identical.
+If the source type is a pointer, the destination type must also be a
+pointer. This instruction supports bitwise conversion of vectors to
+integers and to vectors of other types (as long as they have the same
+size).
+
+Semantics:
+""""""""""
+
+The '``bitcast``' instruction converts ``value`` to type ``ty2``. It is
+always a *no-op cast* because no bits change with this conversion. The
+conversion is done as if the ``value`` had been stored to memory and
+read back as type ``ty2``. Pointer (or vector of pointers) types may
+only be converted to other pointer (or vector of pointers) types with
+this instruction. To convert pointers to other types, use the
+:ref:`inttoptr <i_inttoptr>` or :ref:`ptrtoint <i_ptrtoint>` instructions
+first.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = bitcast i8 255 to i8              ; yields i8 :-1
+      %Y = bitcast i32* %x to sint*          ; yields sint*:%x
+      %Z = bitcast <2 x int> %V to i64;        ; yields i64: %V
+      %Z = bitcast <2 x i32*> %V to <2 x i64*> ; yields <2 x i64*>
+
+.. _otherops:
+
+Other Operations
+----------------
+
+The instructions in this category are the "miscellaneous" instructions,
+which defy better classification.
+
+.. _i_icmp:
+
+'``icmp``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = icmp <cond> <ty> <op1>, <op2>   ; yields {i1} or {<N x i1>}:result
+
+Overview:
+"""""""""
+
+The '``icmp``' instruction returns a boolean value or a vector of
+boolean values based on comparison of its two integer, integer vector,
+pointer, or pointer vector operands.
+
+Arguments:
+""""""""""
+
+The '``icmp``' instruction takes three operands. The first operand is
+the condition code indicating the kind of comparison to perform. It is
+not a value, just a keyword. The possible condition code are:
+
+#. ``eq``: equal
+#. ``ne``: not equal
+#. ``ugt``: unsigned greater than
+#. ``uge``: unsigned greater or equal
+#. ``ult``: unsigned less than
+#. ``ule``: unsigned less or equal
+#. ``sgt``: signed greater than
+#. ``sge``: signed greater or equal
+#. ``slt``: signed less than
+#. ``sle``: signed less or equal
+
+The remaining two arguments must be :ref:`integer <t_integer>` or
+:ref:`pointer <t_pointer>` or integer :ref:`vector <t_vector>` typed. They
+must also be identical types.
+
+Semantics:
+""""""""""
+
+The '``icmp``' compares ``op1`` and ``op2`` according to the condition
+code given as ``cond``. The comparison performed always yields either an
+:ref:`i1 <t_integer>` or vector of ``i1`` result, as follows:
+
+#. ``eq``: yields ``true`` if the operands are equal, ``false``
+   otherwise. No sign interpretation is necessary or performed.
+#. ``ne``: yields ``true`` if the operands are unequal, ``false``
+   otherwise. No sign interpretation is necessary or performed.
+#. ``ugt``: interprets the operands as unsigned values and yields
+   ``true`` if ``op1`` is greater than ``op2``.
+#. ``uge``: interprets the operands as unsigned values and yields
+   ``true`` if ``op1`` is greater than or equal to ``op2``.
+#. ``ult``: interprets the operands as unsigned values and yields
+   ``true`` if ``op1`` is less than ``op2``.
+#. ``ule``: interprets the operands as unsigned values and yields
+   ``true`` if ``op1`` is less than or equal to ``op2``.
+#. ``sgt``: interprets the operands as signed values and yields ``true``
+   if ``op1`` is greater than ``op2``.
+#. ``sge``: interprets the operands as signed values and yields ``true``
+   if ``op1`` is greater than or equal to ``op2``.
+#. ``slt``: interprets the operands as signed values and yields ``true``
+   if ``op1`` is less than ``op2``.
+#. ``sle``: interprets the operands as signed values and yields ``true``
+   if ``op1`` is less than or equal to ``op2``.
+
+If the operands are :ref:`pointer <t_pointer>` typed, the pointer values
+are compared as if they were integers.
+
+If the operands are integer vectors, then they are compared element by
+element. The result is an ``i1`` vector with the same number of elements
+as the values being compared. Otherwise, the result is an ``i1``.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = icmp eq i32 4, 5          ; yields: result=false
+      <result> = icmp ne float* %X, %X     ; yields: result=false
+      <result> = icmp ult i16  4, 5        ; yields: result=true
+      <result> = icmp sgt i16  4, 5        ; yields: result=false
+      <result> = icmp ule i16 -4, 5        ; yields: result=false
+      <result> = icmp sge i16  4, 5        ; yields: result=false
+
+Note that the code generator does not yet support vector types with the
+``icmp`` instruction.
+
+.. _i_fcmp:
+
+'``fcmp``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = fcmp <cond> <ty> <op1>, <op2>     ; yields {i1} or {<N x i1>}:result
+
+Overview:
+"""""""""
+
+The '``fcmp``' instruction returns a boolean value or vector of boolean
+values based on comparison of its operands.
+
+If the operands are floating point scalars, then the result type is a
+boolean (:ref:`i1 <t_integer>`).
+
+If the operands are floating point vectors, then the result type is a
+vector of boolean with the same number of elements as the operands being
+compared.
+
+Arguments:
+""""""""""
+
+The '``fcmp``' instruction takes three operands. The first operand is
+the condition code indicating the kind of comparison to perform. It is
+not a value, just a keyword. The possible condition code are:
+
+#. ``false``: no comparison, always returns false
+#. ``oeq``: ordered and equal
+#. ``ogt``: ordered and greater than
+#. ``oge``: ordered and greater than or equal
+#. ``olt``: ordered and less than
+#. ``ole``: ordered and less than or equal
+#. ``one``: ordered and not equal
+#. ``ord``: ordered (no nans)
+#. ``ueq``: unordered or equal
+#. ``ugt``: unordered or greater than
+#. ``uge``: unordered or greater than or equal
+#. ``ult``: unordered or less than
+#. ``ule``: unordered or less than or equal
+#. ``une``: unordered or not equal
+#. ``uno``: unordered (either nans)
+#. ``true``: no comparison, always returns true
+
+*Ordered* means that neither operand is a QNAN while *unordered* means
+that either operand may be a QNAN.
+
+Each of ``val1`` and ``val2`` arguments must be either a :ref:`floating
+point <t_floating>` type or a :ref:`vector <t_vector>` of floating point
+type. They must have identical types.
+
+Semantics:
+""""""""""
+
+The '``fcmp``' instruction compares ``op1`` and ``op2`` according to the
+condition code given as ``cond``. If the operands are vectors, then the
+vectors are compared element by element. Each comparison performed
+always yields an :ref:`i1 <t_integer>` result, as follows:
+
+#. ``false``: always yields ``false``, regardless of operands.
+#. ``oeq``: yields ``true`` if both operands are not a QNAN and ``op1``
+   is equal to ``op2``.
+#. ``ogt``: yields ``true`` if both operands are not a QNAN and ``op1``
+   is greater than ``op2``.
+#. ``oge``: yields ``true`` if both operands are not a QNAN and ``op1``
+   is greater than or equal to ``op2``.
+#. ``olt``: yields ``true`` if both operands are not a QNAN and ``op1``
+   is less than ``op2``.
+#. ``ole``: yields ``true`` if both operands are not a QNAN and ``op1``
+   is less than or equal to ``op2``.
+#. ``one``: yields ``true`` if both operands are not a QNAN and ``op1``
+   is not equal to ``op2``.
+#. ``ord``: yields ``true`` if both operands are not a QNAN.
+#. ``ueq``: yields ``true`` if either operand is a QNAN or ``op1`` is
+   equal to ``op2``.
+#. ``ugt``: yields ``true`` if either operand is a QNAN or ``op1`` is
+   greater than ``op2``.
+#. ``uge``: yields ``true`` if either operand is a QNAN or ``op1`` is
+   greater than or equal to ``op2``.
+#. ``ult``: yields ``true`` if either operand is a QNAN or ``op1`` is
+   less than ``op2``.
+#. ``ule``: yields ``true`` if either operand is a QNAN or ``op1`` is
+   less than or equal to ``op2``.
+#. ``une``: yields ``true`` if either operand is a QNAN or ``op1`` is
+   not equal to ``op2``.
+#. ``uno``: yields ``true`` if either operand is a QNAN.
+#. ``true``: always yields ``true``, regardless of operands.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      <result> = fcmp oeq float 4.0, 5.0    ; yields: result=false
+      <result> = fcmp one float 4.0, 5.0    ; yields: result=true
+      <result> = fcmp olt float 4.0, 5.0    ; yields: result=true
+      <result> = fcmp ueq double 1.0, 2.0   ; yields: result=false
+
+Note that the code generator does not yet support vector types with the
+``fcmp`` instruction.
+
+.. _i_phi:
+
+'``phi``' Instruction
+^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = phi <ty> [ <val0>, <label0>], ...
+
+Overview:
+"""""""""
+
+The '``phi``' instruction is used to implement the φ node in the SSA
+graph representing the function.
+
+Arguments:
+""""""""""
+
+The type of the incoming values is specified with the first type field.
+After this, the '``phi``' instruction takes a list of pairs as
+arguments, with one pair for each predecessor basic block of the current
+block. Only values of :ref:`first class <t_firstclass>` type may be used as
+the value arguments to the PHI node. Only labels may be used as the
+label arguments.
+
+There must be no non-phi instructions between the start of a basic block
+and the PHI instructions: i.e. PHI instructions must be first in a basic
+block.
+
+For the purposes of the SSA form, the use of each incoming value is
+deemed to occur on the edge from the corresponding predecessor block to
+the current block (but after any definition of an '``invoke``'
+instruction's return value on the same edge).
+
+Semantics:
+""""""""""
+
+At runtime, the '``phi``' instruction logically takes on the value
+specified by the pair corresponding to the predecessor basic block that
+executed just prior to the current block.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+    Loop:       ; Infinite loop that counts from 0 on up...
+      %indvar = phi i32 [ 0, %LoopHeader ], [ %nextindvar, %Loop ]
+      %nextindvar = add i32 %indvar, 1
+      br label %Loop
+
+.. _i_select:
+
+'``select``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = select selty <cond>, <ty> <val1>, <ty> <val2>             ; yields ty
+
+      selty is either i1 or {<N x i1>}
+
+Overview:
+"""""""""
+
+The '``select``' instruction is used to choose one value based on a
+condition, without branching.
+
+Arguments:
+""""""""""
+
+The '``select``' instruction requires an 'i1' value or a vector of 'i1'
+values indicating the condition, and two values of the same :ref:`first
+class <t_firstclass>` type. If the val1/val2 are vectors and the
+condition is a scalar, then entire vectors are selected, not individual
+elements.
+
+Semantics:
+""""""""""
+
+If the condition is an i1 and it evaluates to 1, the instruction returns
+the first value argument; otherwise, it returns the second value
+argument.
+
+If the condition is a vector of i1, then the value arguments must be
+vectors of the same size, and the selection is done element by element.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %X = select i1 true, i8 17, i8 42          ; yields i8:17
+
+.. _i_call:
+
+'``call``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <result> = [tail] call [cconv] [ret attrs] <ty> [<fnty>*] <fnptrval>(<function args>) [fn attrs]
+
+Overview:
+"""""""""
+
+The '``call``' instruction represents a simple function call.
+
+Arguments:
+""""""""""
+
+This instruction requires several arguments:
+
+#. The optional "tail" marker indicates that the callee function does
+   not access any allocas or varargs in the caller. Note that calls may
+   be marked "tail" even if they do not occur before a
+   :ref:`ret <i_ret>` instruction. If the "tail" marker is present, the
+   function call is eligible for tail call optimization, but `might not
+   in fact be optimized into a jump <CodeGenerator.html#tailcallopt>`_.
+   The code generator may optimize calls marked "tail" with either 1)
+   automatic `sibling call
+   optimization <CodeGenerator.html#sibcallopt>`_ when the caller and
+   callee have matching signatures, or 2) forced tail call optimization
+   when the following extra requirements are met:
+
+   -  Caller and callee both have the calling convention ``fastcc``.
+   -  The call is in tail position (ret immediately follows call and ret
+      uses value of call or is void).
+   -  Option ``-tailcallopt`` is enabled, or
+      ``llvm::GuaranteedTailCallOpt`` is ``true``.
+   -  `Platform specific constraints are
+      met. <CodeGenerator.html#tailcallopt>`_
+
+#. The optional "cconv" marker indicates which :ref:`calling
+   convention <callingconv>` the call should use. If none is
+   specified, the call defaults to using C calling conventions. The
+   calling convention of the call must match the calling convention of
+   the target function, or else the behavior is undefined.
+#. The optional :ref:`Parameter Attributes <paramattrs>` list for return
+   values. Only '``zeroext``', '``signext``', and '``inreg``' attributes
+   are valid here.
+#. '``ty``': the type of the call instruction itself which is also the
+   type of the return value. Functions that return no value are marked
+   ``void``.
+#. '``fnty``': shall be the signature of the pointer to function value
+   being invoked. The argument types must match the types implied by
+   this signature. This type can be omitted if the function is not
+   varargs and if the function type does not return a pointer to a
+   function.
+#. '``fnptrval``': An LLVM value containing a pointer to a function to
+   be invoked. In most cases, this is a direct function invocation, but
+   indirect ``call``'s are just as possible, calling an arbitrary pointer
+   to function value.
+#. '``function args``': argument list whose types match the function
+   signature argument types and parameter attributes. All arguments must
+   be of :ref:`first class <t_firstclass>` type. If the function signature
+   indicates the function accepts a variable number of arguments, the
+   extra arguments can be specified.
+#. The optional :ref:`function attributes <fnattrs>` list. Only
+   '``noreturn``', '``nounwind``', '``readonly``' and '``readnone``'
+   attributes are valid here.
+
+Semantics:
+""""""""""
+
+The '``call``' instruction is used to cause control flow to transfer to
+a specified function, with its incoming arguments bound to the specified
+values. Upon a '``ret``' instruction in the called function, control
+flow continues with the instruction after the function call, and the
+return value of the function is bound to the result argument.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %retval = call i32 @test(i32 %argc)
+      call i32 (i8*, ...)* @printf(i8* %msg, i32 12, i8 42)        ; yields i32
+      %X = tail call i32 @foo()                                    ; yields i32
+      %Y = tail call fastcc i32 @foo()  ; yields i32
+      call void %foo(i8 97 signext)
+
+      %struct.A = type { i32, i8 }
+      %r = call %struct.A @foo()                        ; yields { 32, i8 }
+      %gr = extractvalue %struct.A %r, 0                ; yields i32
+      %gr1 = extractvalue %struct.A %r, 1               ; yields i8
+      %Z = call void @foo() noreturn                    ; indicates that %foo never returns normally
+      %ZZ = call zeroext i32 @bar()                     ; Return value is %zero extended
+
+llvm treats calls to some functions with names and arguments that match
+the standard C99 library as being the C99 library functions, and may
+perform optimizations or generate code for them under that assumption.
+This is something we'd like to change in the future to provide better
+support for freestanding environments and non-C-based languages.
+
+.. _i_va_arg:
+
+'``va_arg``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <resultval> = va_arg <va_list*> <arglist>, <argty>
+
+Overview:
+"""""""""
+
+The '``va_arg``' instruction is used to access arguments passed through
+the "variable argument" area of a function call. It is used to implement
+the ``va_arg`` macro in C.
+
+Arguments:
+""""""""""
+
+This instruction takes a ``va_list*`` value and the type of the
+argument. It returns a value of the specified argument type and
+increments the ``va_list`` to point to the next argument. The actual
+type of ``va_list`` is target specific.
+
+Semantics:
+""""""""""
+
+The '``va_arg``' instruction loads an argument of the specified type
+from the specified ``va_list`` and causes the ``va_list`` to point to
+the next argument. For more information, see the variable argument
+handling :ref:`Intrinsic Functions <int_varargs>`.
+
+It is legal for this instruction to be called in a function which does
+not take a variable number of arguments, for example, the ``vfprintf``
+function.
+
+``va_arg`` is an LLVM instruction instead of an :ref:`intrinsic
+function <intrinsics>` because it takes a type as an argument.
+
+Example:
+""""""""
+
+See the :ref:`variable argument processing <int_varargs>` section.
+
+Note that the code generator does not yet fully support va\_arg on many
+targets. Also, it does not currently support va\_arg with aggregate
+types on any target.
+
+.. _i_landingpad:
+
+'``landingpad``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <resultval> = landingpad <resultty> personality <type> <pers_fn> <clause>+
+      <resultval> = landingpad <resultty> personality <type> <pers_fn> cleanup <clause>*
+
+      <clause> := catch <type> <value>
+      <clause> := filter <array constant type> <array constant>
+
+Overview:
+"""""""""
+
+The '``landingpad``' instruction is used by `LLVM's exception handling
+system <ExceptionHandling.html#overview>`_ to specify that a basic block
+is a landing pad — one where the exception lands, and corresponds to the
+code found in the ``catch`` portion of a ``try``/``catch`` sequence. It
+defines values supplied by the personality function (``pers_fn``) upon
+re-entry to the function. The ``resultval`` has the type ``resultty``.
+
+Arguments:
+""""""""""
+
+This instruction takes a ``pers_fn`` value. This is the personality
+function associated with the unwinding mechanism. The optional
+``cleanup`` flag indicates that the landing pad block is a cleanup.
+
+A ``clause`` begins with the clause type — ``catch`` or ``filter`` — and
+contains the global variable representing the "type" that may be caught
+or filtered respectively. Unlike the ``catch`` clause, the ``filter``
+clause takes an array constant as its argument. Use
+"``[0 x i8**] undef``" for a filter which cannot throw. The
+'``landingpad``' instruction must contain *at least* one ``clause`` or
+the ``cleanup`` flag.
+
+Semantics:
+""""""""""
+
+The '``landingpad``' instruction defines the values which are set by the
+personality function (``pers_fn``) upon re-entry to the function, and
+therefore the "result type" of the ``landingpad`` instruction. As with
+calling conventions, how the personality function results are
+represented in LLVM IR is target specific.
+
+The clauses are applied in order from top to bottom. If two
+``landingpad`` instructions are merged together through inlining, the
+clauses from the calling function are appended to the list of clauses.
+When the call stack is being unwound due to an exception being thrown,
+the exception is compared against each ``clause`` in turn. If it doesn't
+match any of the clauses, and the ``cleanup`` flag is not set, then
+unwinding continues further up the call stack.
+
+The ``landingpad`` instruction has several restrictions:
+
+-  A landing pad block is a basic block which is the unwind destination
+   of an '``invoke``' instruction.
+-  A landing pad block must have a '``landingpad``' instruction as its
+   first non-PHI instruction.
+-  There can be only one '``landingpad``' instruction within the landing
+   pad block.
+-  A basic block that is not a landing pad block may not include a
+   '``landingpad``' instruction.
+-  All '``landingpad``' instructions in a function must have the same
+   personality function.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      ;; A landing pad which can catch an integer.
+      %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+               catch i8** @_ZTIi
+      ;; A landing pad that is a cleanup.
+      %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+               cleanup
+      ;; A landing pad which can catch an integer and can only throw a double.
+      %res = landingpad { i8*, i32 } personality i32 (...)* @__gxx_personality_v0
+               catch i8** @_ZTIi
+               filter [1 x i8**] [@_ZTId]
+
+.. _intrinsics:
+
+Intrinsic Functions
+===================
+
+LLVM supports the notion of an "intrinsic function". These functions
+have well known names and semantics and are required to follow certain
+restrictions. Overall, these intrinsics represent an extension mechanism
+for the LLVM language that does not require changing all of the
+transformations in LLVM when adding to the language (or the bitcode
+reader/writer, the parser, etc...).
+
+Intrinsic function names must all start with an "``llvm.``" prefix. This
+prefix is reserved in LLVM for intrinsic names; thus, function names may
+not begin with this prefix. Intrinsic functions must always be external
+functions: you cannot define the body of intrinsic functions. Intrinsic
+functions may only be used in call or invoke instructions: it is illegal
+to take the address of an intrinsic function. Additionally, because
+intrinsic functions are part of the LLVM language, it is required if any
+are added that they be documented here.
+
+Some intrinsic functions can be overloaded, i.e., the intrinsic
+represents a family of functions that perform the same operation but on
+different data types. Because LLVM can represent over 8 million
+different integer types, overloading is used commonly to allow an
+intrinsic function to operate on any integer type. One or more of the
+argument types or the result type can be overloaded to accept any
+integer type. Argument types may also be defined as exactly matching a
+previous argument's type or the result type. This allows an intrinsic
+function which accepts multiple arguments, but needs all of them to be
+of the same type, to only be overloaded with respect to a single
+argument or the result.
+
+Overloaded intrinsics will have the names of its overloaded argument
+types encoded into its function name, each preceded by a period. Only
+those types which are overloaded result in a name suffix. Arguments
+whose type is matched against another type do not. For example, the
+``llvm.ctpop`` function can take an integer of any width and returns an
+integer of exactly the same integer width. This leads to a family of
+functions such as ``i8 @llvm.ctpop.i8(i8 %val)`` and
+``i29 @llvm.ctpop.i29(i29 %val)``. Only one type, the return type, is
+overloaded, and only one type suffix is required. Because the argument's
+type is matched against the return type, it does not require its own
+name suffix.
+
+To learn how to add an intrinsic function, please see the `Extending
+LLVM Guide <ExtendingLLVM.html>`_.
+
+.. _int_varargs:
+
+Variable Argument Handling Intrinsics
+-------------------------------------
+
+Variable argument support is defined in LLVM with the
+:ref:`va_arg <i_va_arg>` instruction and these three intrinsic
+functions. These functions are related to the similarly named macros
+defined in the ``<stdarg.h>`` header file.
+
+All of these functions operate on arguments that use a target-specific
+value type "``va_list``". The LLVM assembly language reference manual
+does not define what this type is, so all transformations should be
+prepared to handle these functions regardless of the type used.
+
+This example shows how the :ref:`va_arg <i_va_arg>` instruction and the
+variable argument handling intrinsic functions are used.
+
+.. code-block:: llvm
+
+    define i32 @test(i32 %X, ...) {
+      ; Initialize variable argument processing
+      %ap = alloca i8*
+      %ap2 = bitcast i8** %ap to i8*
+      call void @llvm.va_start(i8* %ap2)
+
+      ; Read a single integer argument
+      %tmp = va_arg i8** %ap, i32
+
+      ; Demonstrate usage of llvm.va_copy and llvm.va_end
+      %aq = alloca i8*
+      %aq2 = bitcast i8** %aq to i8*
+      call void @llvm.va_copy(i8* %aq2, i8* %ap2)
+      call void @llvm.va_end(i8* %aq2)
+
+      ; Stop processing of arguments.
+      call void @llvm.va_end(i8* %ap2)
+      ret i32 %tmp
+    }
+
+    declare void @llvm.va_start(i8*)
+    declare void @llvm.va_copy(i8*, i8*)
+    declare void @llvm.va_end(i8*)
+
+.. _int_va_start:
+
+'``llvm.va_start``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void %llvm.va_start(i8* <arglist>)
+
+Overview:
+"""""""""
+
+The '``llvm.va_start``' intrinsic initializes ``*<arglist>`` for
+subsequent use by ``va_arg``.
+
+Arguments:
+""""""""""
+
+The argument is a pointer to a ``va_list`` element to initialize.
+
+Semantics:
+""""""""""
+
+The '``llvm.va_start``' intrinsic works just like the ``va_start`` macro
+available in C. In a target-dependent way, it initializes the
+``va_list`` element to which the argument points, so that the next call
+to ``va_arg`` will produce the first variable argument passed to the
+function. Unlike the C ``va_start`` macro, this intrinsic does not need
+to know the last argument of the function as the compiler can figure
+that out.
+
+'``llvm.va_end``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.va_end(i8* <arglist>)
+
+Overview:
+"""""""""
+
+The '``llvm.va_end``' intrinsic destroys ``*<arglist>``, which has been
+initialized previously with ``llvm.va_start`` or ``llvm.va_copy``.
+
+Arguments:
+""""""""""
+
+The argument is a pointer to a ``va_list`` to destroy.
+
+Semantics:
+""""""""""
+
+The '``llvm.va_end``' intrinsic works just like the ``va_end`` macro
+available in C. In a target-dependent way, it destroys the ``va_list``
+element to which the argument points. Calls to
+:ref:`llvm.va_start <int_va_start>` and
+:ref:`llvm.va_copy <int_va_copy>` must be matched exactly with calls to
+``llvm.va_end``.
+
+.. _int_va_copy:
+
+'``llvm.va_copy``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.va_copy(i8* <destarglist>, i8* <srcarglist>)
+
+Overview:
+"""""""""
+
+The '``llvm.va_copy``' intrinsic copies the current argument position
+from the source argument list to the destination argument list.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to a ``va_list`` element to initialize.
+The second argument is a pointer to a ``va_list`` element to copy from.
+
+Semantics:
+""""""""""
+
+The '``llvm.va_copy``' intrinsic works just like the ``va_copy`` macro
+available in C. In a target-dependent way, it copies the source
+``va_list`` element into the destination ``va_list`` element. This
+intrinsic is necessary because the `` llvm.va_start`` intrinsic may be
+arbitrarily complex and require, for example, memory allocation.
+
+Accurate Garbage Collection Intrinsics
+--------------------------------------
+
+LLVM support for `Accurate Garbage Collection <GarbageCollection.html>`_
+(GC) requires the implementation and generation of these intrinsics.
+These intrinsics allow identification of :ref:`GC roots on the
+stack <int_gcroot>`, as well as garbage collector implementations that
+require :ref:`read <int_gcread>` and :ref:`write <int_gcwrite>` barriers.
+Front-ends for type-safe garbage collected languages should generate
+these intrinsics to make use of the LLVM garbage collectors. For more
+details, see `Accurate Garbage Collection with
+LLVM <GarbageCollection.html>`_.
+
+The garbage collection intrinsics only operate on objects in the generic
+address space (address space zero).
+
+.. _int_gcroot:
+
+'``llvm.gcroot``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.gcroot(i8** %ptrloc, i8* %metadata)
+
+Overview:
+"""""""""
+
+The '``llvm.gcroot``' intrinsic declares the existence of a GC root to
+the code generator, and allows some metadata to be associated with it.
+
+Arguments:
+""""""""""
+
+The first argument specifies the address of a stack object that contains
+the root pointer. The second pointer (which must be either a constant or
+a global value address) contains the meta-data to be associated with the
+root.
+
+Semantics:
+""""""""""
+
+At runtime, a call to this intrinsic stores a null pointer into the
+"ptrloc" location. At compile-time, the code generator generates
+information to allow the runtime to find the pointer at GC safe points.
+The '``llvm.gcroot``' intrinsic may only be used in a function which
+:ref:`specifies a GC algorithm <gc>`.
+
+.. _int_gcread:
+
+'``llvm.gcread``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i8* @llvm.gcread(i8* %ObjPtr, i8** %Ptr)
+
+Overview:
+"""""""""
+
+The '``llvm.gcread``' intrinsic identifies reads of references from heap
+locations, allowing garbage collector implementations that require read
+barriers.
+
+Arguments:
+""""""""""
+
+The second argument is the address to read from, which should be an
+address allocated from the garbage collector. The first object is a
+pointer to the start of the referenced object, if needed by the language
+runtime (otherwise null).
+
+Semantics:
+""""""""""
+
+The '``llvm.gcread``' intrinsic has the same semantics as a load
+instruction, but may be replaced with substantially more complex code by
+the garbage collector runtime, as needed. The '``llvm.gcread``'
+intrinsic may only be used in a function which :ref:`specifies a GC
+algorithm <gc>`.
+
+.. _int_gcwrite:
+
+'``llvm.gcwrite``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.gcwrite(i8* %P1, i8* %Obj, i8** %P2)
+
+Overview:
+"""""""""
+
+The '``llvm.gcwrite``' intrinsic identifies writes of references to heap
+locations, allowing garbage collector implementations that require write
+barriers (such as generational or reference counting collectors).
+
+Arguments:
+""""""""""
+
+The first argument is the reference to store, the second is the start of
+the object to store it to, and the third is the address of the field of
+Obj to store to. If the runtime does not require a pointer to the
+object, Obj may be null.
+
+Semantics:
+""""""""""
+
+The '``llvm.gcwrite``' intrinsic has the same semantics as a store
+instruction, but may be replaced with substantially more complex code by
+the garbage collector runtime, as needed. The '``llvm.gcwrite``'
+intrinsic may only be used in a function which :ref:`specifies a GC
+algorithm <gc>`.
+
+Code Generator Intrinsics
+-------------------------
+
+These intrinsics are provided by LLVM to expose special features that
+may only be implemented with code generator support.
+
+'``llvm.returnaddress``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i8  *@llvm.returnaddress(i32 <level>)
+
+Overview:
+"""""""""
+
+The '``llvm.returnaddress``' intrinsic attempts to compute a
+target-specific value indicating the return address of the current
+function or one of its callers.
+
+Arguments:
+""""""""""
+
+The argument to this intrinsic indicates which function to return the
+address for. Zero indicates the calling function, one indicates its
+caller, etc. The argument is **required** to be a constant integer
+value.
+
+Semantics:
+""""""""""
+
+The '``llvm.returnaddress``' intrinsic either returns a pointer
+indicating the return address of the specified call frame, or zero if it
+cannot be identified. The value returned by this intrinsic is likely to
+be incorrect or 0 for arguments other than zero, so it should only be
+used for debugging purposes.
+
+Note that calling this intrinsic does not prevent function inlining or
+other aggressive transformations, so the value returned may not be that
+of the obvious source-language caller.
+
+'``llvm.frameaddress``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i8* @llvm.frameaddress(i32 <level>)
+
+Overview:
+"""""""""
+
+The '``llvm.frameaddress``' intrinsic attempts to return the
+target-specific frame pointer value for the specified stack frame.
+
+Arguments:
+""""""""""
+
+The argument to this intrinsic indicates which function to return the
+frame pointer for. Zero indicates the calling function, one indicates
+its caller, etc. The argument is **required** to be a constant integer
+value.
+
+Semantics:
+""""""""""
+
+The '``llvm.frameaddress``' intrinsic either returns a pointer
+indicating the frame address of the specified call frame, or zero if it
+cannot be identified. The value returned by this intrinsic is likely to
+be incorrect or 0 for arguments other than zero, so it should only be
+used for debugging purposes.
+
+Note that calling this intrinsic does not prevent function inlining or
+other aggressive transformations, so the value returned may not be that
+of the obvious source-language caller.
+
+.. _int_stacksave:
+
+'``llvm.stacksave``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i8* @llvm.stacksave()
+
+Overview:
+"""""""""
+
+The '``llvm.stacksave``' intrinsic is used to remember the current state
+of the function stack, for use with
+:ref:`llvm.stackrestore <int_stackrestore>`. This is useful for
+implementing language features like scoped automatic variable sized
+arrays in C99.
+
+Semantics:
+""""""""""
+
+This intrinsic returns a opaque pointer value that can be passed to
+:ref:`llvm.stackrestore <int_stackrestore>`. When an
+``llvm.stackrestore`` intrinsic is executed with a value saved from
+``llvm.stacksave``, it effectively restores the state of the stack to
+the state it was in when the ``llvm.stacksave`` intrinsic executed. In
+practice, this pops any :ref:`alloca <i_alloca>` blocks from the stack that
+were allocated after the ``llvm.stacksave`` was executed.
+
+.. _int_stackrestore:
+
+'``llvm.stackrestore``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.stackrestore(i8* %ptr)
+
+Overview:
+"""""""""
+
+The '``llvm.stackrestore``' intrinsic is used to restore the state of
+the function stack to the state it was in when the corresponding
+:ref:`llvm.stacksave <int_stacksave>` intrinsic executed. This is
+useful for implementing language features like scoped automatic variable
+sized arrays in C99.
+
+Semantics:
+""""""""""
+
+See the description for :ref:`llvm.stacksave <int_stacksave>`.
+
+'``llvm.prefetch``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.prefetch(i8* <address>, i32 <rw>, i32 <locality>, i32 <cache type>)
+
+Overview:
+"""""""""
+
+The '``llvm.prefetch``' intrinsic is a hint to the code generator to
+insert a prefetch instruction if supported; otherwise, it is a noop.
+Prefetches have no effect on the behavior of the program but can change
+its performance characteristics.
+
+Arguments:
+""""""""""
+
+``address`` is the address to be prefetched, ``rw`` is the specifier
+determining if the fetch should be for a read (0) or write (1), and
+``locality`` is a temporal locality specifier ranging from (0) - no
+locality, to (3) - extremely local keep in cache. The ``cache type``
+specifies whether the prefetch is performed on the data (1) or
+instruction (0) cache. The ``rw``, ``locality`` and ``cache type``
+arguments must be constant integers.
+
+Semantics:
+""""""""""
+
+This intrinsic does not modify the behavior of the program. In
+particular, prefetches cannot trap and do not produce a value. On
+targets that support this intrinsic, the prefetch can provide hints to
+the processor cache for better performance.
+
+'``llvm.pcmarker``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.pcmarker(i32 <id>)
+
+Overview:
+"""""""""
+
+The '``llvm.pcmarker``' intrinsic is a method to export a Program
+Counter (PC) in a region of code to simulators and other tools. The
+method is target specific, but it is expected that the marker will use
+exported symbols to transmit the PC of the marker. The marker makes no
+guarantees that it will remain with any specific instruction after
+optimizations. It is possible that the presence of a marker will inhibit
+optimizations. The intended use is to be inserted after optimizations to
+allow correlations of simulation runs.
+
+Arguments:
+""""""""""
+
+``id`` is a numerical id identifying the marker.
+
+Semantics:
+""""""""""
+
+This intrinsic does not modify the behavior of the program. Backends
+that do not support this intrinsic may ignore it.
+
+'``llvm.readcyclecounter``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i64 @llvm.readcyclecounter()
+
+Overview:
+"""""""""
+
+The '``llvm.readcyclecounter``' intrinsic provides access to the cycle
+counter register (or similar low latency, high accuracy clocks) on those
+targets that support it. On X86, it should map to RDTSC. On Alpha, it
+should map to RPCC. As the backing counters overflow quickly (on the
+order of 9 seconds on alpha), this should only be used for small
+timings.
+
+Semantics:
+""""""""""
+
+When directly supported, reading the cycle counter should not modify any
+memory. Implementations are allowed to either return a application
+specific value or a system wide value. On backends without support, this
+is lowered to a constant 0.
+
+Standard C Library Intrinsics
+-----------------------------
+
+LLVM provides intrinsics for a few important standard C library
+functions. These intrinsics allow source-language front-ends to pass
+information about the alignment of the pointer arguments to the code
+generator, providing opportunity for more efficient code generation.
+
+.. _int_memcpy:
+
+'``llvm.memcpy``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.memcpy`` on any
+integer bit width and for different address spaces. Not all targets
+support all bit widths however.
+
+::
+
+      declare void @llvm.memcpy.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
+                                              i32 <len>, i32 <align>, i1 <isvolatile>)
+      declare void @llvm.memcpy.p0i8.p0i8.i64(i8* <dest>, i8* <src>,
+                                              i64 <len>, i32 <align>, i1 <isvolatile>)
+
+Overview:
+"""""""""
+
+The '``llvm.memcpy.*``' intrinsics copy a block of memory from the
+source location to the destination location.
+
+Note that, unlike the standard libc function, the ``llvm.memcpy.*``
+intrinsics do not return a value, takes extra alignment/isvolatile
+arguments and the pointers can be in specified address spaces.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to the destination, the second is a
+pointer to the source. The third argument is an integer argument
+specifying the number of bytes to copy, the fourth argument is the
+alignment of the source and destination locations, and the fifth is a
+boolean indicating a volatile access.
+
+If the call to this intrinsic has an alignment value that is not 0 or 1,
+then the caller guarantees that both the source and destination pointers
+are aligned to that boundary.
+
+If the ``isvolatile`` parameter is ``true``, the ``llvm.memcpy`` call is
+a :ref:`volatile operation <volatile>`. The detailed access behavior is not
+very cleanly specified and it is unwise to depend on it.
+
+Semantics:
+""""""""""
+
+The '``llvm.memcpy.*``' intrinsics copy a block of memory from the
+source location to the destination location, which are not allowed to
+overlap. It copies "len" bytes of memory over. If the argument is known
+to be aligned to some boundary, this can be specified as the fourth
+argument, otherwise it should be set to 0 or 1.
+
+'``llvm.memmove``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use llvm.memmove on any integer
+bit width and for different address space. Not all targets support all
+bit widths however.
+
+::
+
+      declare void @llvm.memmove.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
+                                               i32 <len>, i32 <align>, i1 <isvolatile>)
+      declare void @llvm.memmove.p0i8.p0i8.i64(i8* <dest>, i8* <src>,
+                                               i64 <len>, i32 <align>, i1 <isvolatile>)
+
+Overview:
+"""""""""
+
+The '``llvm.memmove.*``' intrinsics move a block of memory from the
+source location to the destination location. It is similar to the
+'``llvm.memcpy``' intrinsic but allows the two memory locations to
+overlap.
+
+Note that, unlike the standard libc function, the ``llvm.memmove.*``
+intrinsics do not return a value, takes extra alignment/isvolatile
+arguments and the pointers can be in specified address spaces.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to the destination, the second is a
+pointer to the source. The third argument is an integer argument
+specifying the number of bytes to copy, the fourth argument is the
+alignment of the source and destination locations, and the fifth is a
+boolean indicating a volatile access.
+
+If the call to this intrinsic has an alignment value that is not 0 or 1,
+then the caller guarantees that the source and destination pointers are
+aligned to that boundary.
+
+If the ``isvolatile`` parameter is ``true``, the ``llvm.memmove`` call
+is a :ref:`volatile operation <volatile>`. The detailed access behavior is
+not very cleanly specified and it is unwise to depend on it.
+
+Semantics:
+""""""""""
+
+The '``llvm.memmove.*``' intrinsics copy a block of memory from the
+source location to the destination location, which may overlap. It
+copies "len" bytes of memory over. If the argument is known to be
+aligned to some boundary, this can be specified as the fourth argument,
+otherwise it should be set to 0 or 1.
+
+'``llvm.memset.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use llvm.memset on any integer
+bit width and for different address spaces. However, not all targets
+support all bit widths.
+
+::
+
+      declare void @llvm.memset.p0i8.i32(i8* <dest>, i8 <val>,
+                                         i32 <len>, i32 <align>, i1 <isvolatile>)
+      declare void @llvm.memset.p0i8.i64(i8* <dest>, i8 <val>,
+                                         i64 <len>, i32 <align>, i1 <isvolatile>)
+
+Overview:
+"""""""""
+
+The '``llvm.memset.*``' intrinsics fill a block of memory with a
+particular byte value.
+
+Note that, unlike the standard libc function, the ``llvm.memset``
+intrinsic does not return a value and takes extra alignment/volatile
+arguments. Also, the destination can be in an arbitrary address space.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to the destination to fill, the second
+is the byte value with which to fill it, the third argument is an
+integer argument specifying the number of bytes to fill, and the fourth
+argument is the known alignment of the destination location.
+
+If the call to this intrinsic has an alignment value that is not 0 or 1,
+then the caller guarantees that the destination pointer is aligned to
+that boundary.
+
+If the ``isvolatile`` parameter is ``true``, the ``llvm.memset`` call is
+a :ref:`volatile operation <volatile>`. The detailed access behavior is not
+very cleanly specified and it is unwise to depend on it.
+
+Semantics:
+""""""""""
+
+The '``llvm.memset.*``' intrinsics fill "len" bytes of memory starting
+at the destination location. If the argument is known to be aligned to
+some boundary, this can be specified as the fourth argument, otherwise
+it should be set to 0 or 1.
+
+'``llvm.sqrt.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.sqrt`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.sqrt.f32(float %Val)
+      declare double    @llvm.sqrt.f64(double %Val)
+      declare x86_fp80  @llvm.sqrt.f80(x86_fp80 %Val)
+      declare fp128     @llvm.sqrt.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.sqrt.ppcf128(ppc_fp128 %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.sqrt``' intrinsics return the sqrt of the specified operand,
+returning the same value as the libm '``sqrt``' functions would. Unlike
+``sqrt`` in libm, however, ``llvm.sqrt`` has undefined behavior for
+negative numbers other than -0.0 (which allows for better optimization,
+because there is no need to worry about errno being set).
+``llvm.sqrt(-0.0)`` is defined to return -0.0 like IEEE sqrt.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the sqrt of the specified operand if it is a
+nonnegative floating point number.
+
+'``llvm.powi.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.powi`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.powi.f32(float  %Val, i32 %power)
+      declare double    @llvm.powi.f64(double %Val, i32 %power)
+      declare x86_fp80  @llvm.powi.f80(x86_fp80  %Val, i32 %power)
+      declare fp128     @llvm.powi.f128(fp128 %Val, i32 %power)
+      declare ppc_fp128 @llvm.powi.ppcf128(ppc_fp128  %Val, i32 %power)
+
+Overview:
+"""""""""
+
+The '``llvm.powi.*``' intrinsics return the first operand raised to the
+specified (positive or negative) power. The order of evaluation of
+multiplications is not defined. When a vector of floating point type is
+used, the second argument remains a scalar integer value.
+
+Arguments:
+""""""""""
+
+The second argument is an integer power, and the first is a value to
+raise to that power.
+
+Semantics:
+""""""""""
+
+This function returns the first value raised to the second power with an
+unspecified sequence of rounding operations.
+
+'``llvm.sin.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.sin`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.sin.f32(float  %Val)
+      declare double    @llvm.sin.f64(double %Val)
+      declare x86_fp80  @llvm.sin.f80(x86_fp80  %Val)
+      declare fp128     @llvm.sin.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.sin.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.sin.*``' intrinsics return the sine of the operand.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the sine of the specified operand, returning the
+same values as the libm ``sin`` functions would, and handles error
+conditions in the same way.
+
+'``llvm.cos.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.cos`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.cos.f32(float  %Val)
+      declare double    @llvm.cos.f64(double %Val)
+      declare x86_fp80  @llvm.cos.f80(x86_fp80  %Val)
+      declare fp128     @llvm.cos.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.cos.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.cos.*``' intrinsics return the cosine of the operand.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the cosine of the specified operand, returning the
+same values as the libm ``cos`` functions would, and handles error
+conditions in the same way.
+
+'``llvm.pow.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.pow`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.pow.f32(float  %Val, float %Power)
+      declare double    @llvm.pow.f64(double %Val, double %Power)
+      declare x86_fp80  @llvm.pow.f80(x86_fp80  %Val, x86_fp80 %Power)
+      declare fp128     @llvm.pow.f128(fp128 %Val, fp128 %Power)
+      declare ppc_fp128 @llvm.pow.ppcf128(ppc_fp128  %Val, ppc_fp128 Power)
+
+Overview:
+"""""""""
+
+The '``llvm.pow.*``' intrinsics return the first operand raised to the
+specified (positive or negative) power.
+
+Arguments:
+""""""""""
+
+The second argument is a floating point power, and the first is a value
+to raise to that power.
+
+Semantics:
+""""""""""
+
+This function returns the first value raised to the second power,
+returning the same values as the libm ``pow`` functions would, and
+handles error conditions in the same way.
+
+'``llvm.exp.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.exp`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.exp.f32(float  %Val)
+      declare double    @llvm.exp.f64(double %Val)
+      declare x86_fp80  @llvm.exp.f80(x86_fp80  %Val)
+      declare fp128     @llvm.exp.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.exp.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.exp.*``' intrinsics perform the exp function.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``exp`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.exp2.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.exp2`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.exp2.f32(float  %Val)
+      declare double    @llvm.exp2.f64(double %Val)
+      declare x86_fp80  @llvm.exp2.f80(x86_fp80  %Val)
+      declare fp128     @llvm.exp2.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.exp2.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.exp2.*``' intrinsics perform the exp2 function.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``exp2`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.log.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.log`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.log.f32(float  %Val)
+      declare double    @llvm.log.f64(double %Val)
+      declare x86_fp80  @llvm.log.f80(x86_fp80  %Val)
+      declare fp128     @llvm.log.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.log.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.log.*``' intrinsics perform the log function.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``log`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.log10.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.log10`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.log10.f32(float  %Val)
+      declare double    @llvm.log10.f64(double %Val)
+      declare x86_fp80  @llvm.log10.f80(x86_fp80  %Val)
+      declare fp128     @llvm.log10.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.log10.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.log10.*``' intrinsics perform the log10 function.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``log10`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.log2.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.log2`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.log2.f32(float  %Val)
+      declare double    @llvm.log2.f64(double %Val)
+      declare x86_fp80  @llvm.log2.f80(x86_fp80  %Val)
+      declare fp128     @llvm.log2.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.log2.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.log2.*``' intrinsics perform the log2 function.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``log2`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.fma.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.fma`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.fma.f32(float  %a, float  %b, float  %c)
+      declare double    @llvm.fma.f64(double %a, double %b, double %c)
+      declare x86_fp80  @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c)
+      declare fp128     @llvm.fma.f128(fp128 %a, fp128 %b, fp128 %c)
+      declare ppc_fp128 @llvm.fma.ppcf128(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c)
+
+Overview:
+"""""""""
+
+The '``llvm.fma.*``' intrinsics perform the fused multiply-add
+operation.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``fma`` functions
+would.
+
+'``llvm.fabs.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.fabs`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.fabs.f32(float  %Val)
+      declare double    @llvm.fabs.f64(double %Val)
+      declare x86_fp80  @llvm.fabs.f80(x86_fp80  %Val)
+      declare fp128     @llvm.fabs.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.fabs.*``' intrinsics return the absolute value of the
+operand.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``fabs`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.floor.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.floor`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.floor.f32(float  %Val)
+      declare double    @llvm.floor.f64(double %Val)
+      declare x86_fp80  @llvm.floor.f80(x86_fp80  %Val)
+      declare fp128     @llvm.floor.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.floor.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.floor.*``' intrinsics return the floor of the operand.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``floor`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.ceil.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.ceil`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.ceil.f32(float  %Val)
+      declare double    @llvm.ceil.f64(double %Val)
+      declare x86_fp80  @llvm.ceil.f80(x86_fp80  %Val)
+      declare fp128     @llvm.ceil.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.ceil.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.ceil.*``' intrinsics return the ceiling of the operand.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``ceil`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.trunc.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.trunc`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.trunc.f32(float  %Val)
+      declare double    @llvm.trunc.f64(double %Val)
+      declare x86_fp80  @llvm.trunc.f80(x86_fp80  %Val)
+      declare fp128     @llvm.trunc.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.trunc.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.trunc.*``' intrinsics returns the operand rounded to the
+nearest integer not larger in magnitude than the operand.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``trunc`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.rint.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.rint`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.rint.f32(float  %Val)
+      declare double    @llvm.rint.f64(double %Val)
+      declare x86_fp80  @llvm.rint.f80(x86_fp80  %Val)
+      declare fp128     @llvm.rint.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.rint.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.rint.*``' intrinsics returns the operand rounded to the
+nearest integer. It may raise an inexact floating-point exception if the
+operand isn't an integer.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``rint`` functions
+would, and handles error conditions in the same way.
+
+'``llvm.nearbyint.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.nearbyint`` on any
+floating point or vector of floating point type. Not all targets support
+all types however.
+
+::
+
+      declare float     @llvm.nearbyint.f32(float  %Val)
+      declare double    @llvm.nearbyint.f64(double %Val)
+      declare x86_fp80  @llvm.nearbyint.f80(x86_fp80  %Val)
+      declare fp128     @llvm.nearbyint.f128(fp128 %Val)
+      declare ppc_fp128 @llvm.nearbyint.ppcf128(ppc_fp128  %Val)
+
+Overview:
+"""""""""
+
+The '``llvm.nearbyint.*``' intrinsics returns the operand rounded to the
+nearest integer.
+
+Arguments:
+""""""""""
+
+The argument and return value are floating point numbers of the same
+type.
+
+Semantics:
+""""""""""
+
+This function returns the same values as the libm ``nearbyint``
+functions would, and handles error conditions in the same way.
+
+Bit Manipulation Intrinsics
+---------------------------
+
+LLVM provides intrinsics for a few important bit manipulation
+operations. These allow efficient code generation for some algorithms.
+
+'``llvm.bswap.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic function. You can use bswap on any
+integer type that is an even number of bytes (i.e. BitWidth % 16 == 0).
+
+::
+
+      declare i16 @llvm.bswap.i16(i16 <id>)
+      declare i32 @llvm.bswap.i32(i32 <id>)
+      declare i64 @llvm.bswap.i64(i64 <id>)
+
+Overview:
+"""""""""
+
+The '``llvm.bswap``' family of intrinsics is used to byte swap integer
+values with an even number of bytes (positive multiple of 16 bits).
+These are useful for performing operations on data that is not in the
+target's native byte order.
+
+Semantics:
+""""""""""
+
+The ``llvm.bswap.i16`` intrinsic returns an i16 value that has the high
+and low byte of the input i16 swapped. Similarly, the ``llvm.bswap.i32``
+intrinsic returns an i32 value that has the four bytes of the input i32
+swapped, so that if the input bytes are numbered 0, 1, 2, 3 then the
+returned i32 will have its bytes in 3, 2, 1, 0 order. The
+``llvm.bswap.i48``, ``llvm.bswap.i64`` and other intrinsics extend this
+concept to additional even-byte lengths (6 bytes, 8 bytes and more,
+respectively).
+
+'``llvm.ctpop.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use llvm.ctpop on any integer
+bit width, or on any vector with integer elements. Not all targets
+support all bit widths or vector types, however.
+
+::
+
+      declare i8 @llvm.ctpop.i8(i8  <src>)
+      declare i16 @llvm.ctpop.i16(i16 <src>)
+      declare i32 @llvm.ctpop.i32(i32 <src>)
+      declare i64 @llvm.ctpop.i64(i64 <src>)
+      declare i256 @llvm.ctpop.i256(i256 <src>)
+      declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32> <src>)
+
+Overview:
+"""""""""
+
+The '``llvm.ctpop``' family of intrinsics counts the number of bits set
+in a value.
+
+Arguments:
+""""""""""
+
+The only argument is the value to be counted. The argument may be of any
+integer type, or a vector with integer elements. The return type must
+match the argument type.
+
+Semantics:
+""""""""""
+
+The '``llvm.ctpop``' intrinsic counts the 1's in a variable, or within
+each element of a vector.
+
+'``llvm.ctlz.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.ctlz`` on any
+integer bit width, or any vector whose elements are integers. Not all
+targets support all bit widths or vector types, however.
+
+::
+
+      declare i8   @llvm.ctlz.i8  (i8   <src>, i1 <is_zero_undef>)
+      declare i16  @llvm.ctlz.i16 (i16  <src>, i1 <is_zero_undef>)
+      declare i32  @llvm.ctlz.i32 (i32  <src>, i1 <is_zero_undef>)
+      declare i64  @llvm.ctlz.i64 (i64  <src>, i1 <is_zero_undef>)
+      declare i256 @llvm.ctlz.i256(i256 <src>, i1 <is_zero_undef>)
+      declase <2 x i32> @llvm.ctlz.v2i32(<2 x i32> <src>, i1 <is_zero_undef>)
+
+Overview:
+"""""""""
+
+The '``llvm.ctlz``' family of intrinsic functions counts the number of
+leading zeros in a variable.
+
+Arguments:
+""""""""""
+
+The first argument is the value to be counted. This argument may be of
+any integer type, or a vectory with integer element type. The return
+type must match the first argument type.
+
+The second argument must be a constant and is a flag to indicate whether
+the intrinsic should ensure that a zero as the first argument produces a
+defined result. Historically some architectures did not provide a
+defined result for zero values as efficiently, and many algorithms are
+now predicated on avoiding zero-value inputs.
+
+Semantics:
+""""""""""
+
+The '``llvm.ctlz``' intrinsic counts the leading (most significant)
+zeros in a variable, or within each element of the vector. If
+``src == 0`` then the result is the size in bits of the type of ``src``
+if ``is_zero_undef == 0`` and ``undef`` otherwise. For example,
+``llvm.ctlz(i32 2) = 30``.
+
+'``llvm.cttz.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.cttz`` on any
+integer bit width, or any vector of integer elements. Not all targets
+support all bit widths or vector types, however.
+
+::
+
+      declare i8   @llvm.cttz.i8  (i8   <src>, i1 <is_zero_undef>)
+      declare i16  @llvm.cttz.i16 (i16  <src>, i1 <is_zero_undef>)
+      declare i32  @llvm.cttz.i32 (i32  <src>, i1 <is_zero_undef>)
+      declare i64  @llvm.cttz.i64 (i64  <src>, i1 <is_zero_undef>)
+      declare i256 @llvm.cttz.i256(i256 <src>, i1 <is_zero_undef>)
+      declase <2 x i32> @llvm.cttz.v2i32(<2 x i32> <src>, i1 <is_zero_undef>)
+
+Overview:
+"""""""""
+
+The '``llvm.cttz``' family of intrinsic functions counts the number of
+trailing zeros.
+
+Arguments:
+""""""""""
+
+The first argument is the value to be counted. This argument may be of
+any integer type, or a vectory with integer element type. The return
+type must match the first argument type.
+
+The second argument must be a constant and is a flag to indicate whether
+the intrinsic should ensure that a zero as the first argument produces a
+defined result. Historically some architectures did not provide a
+defined result for zero values as efficiently, and many algorithms are
+now predicated on avoiding zero-value inputs.
+
+Semantics:
+""""""""""
+
+The '``llvm.cttz``' intrinsic counts the trailing (least significant)
+zeros in a variable, or within each element of a vector. If ``src == 0``
+then the result is the size in bits of the type of ``src`` if
+``is_zero_undef == 0`` and ``undef`` otherwise. For example,
+``llvm.cttz(2) = 1``.
+
+Arithmetic with Overflow Intrinsics
+-----------------------------------
+
+LLVM provides intrinsics for some arithmetic with overflow operations.
+
+'``llvm.sadd.with.overflow.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.sadd.with.overflow``
+on any integer bit width.
+
+::
+
+      declare {i16, i1} @llvm.sadd.with.overflow.i16(i16 %a, i16 %b)
+      declare {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+      declare {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b)
+
+Overview:
+"""""""""
+
+The '``llvm.sadd.with.overflow``' family of intrinsic functions perform
+a signed addition of the two arguments, and indicate whether an overflow
+occurred during the signed summation.
+
+Arguments:
+""""""""""
+
+The arguments (%a and %b) and the first element of the result structure
+may be of integer types of any bit width, but they must have the same
+bit width. The second element of the result structure must be of type
+``i1``. ``%a`` and ``%b`` are the two values that will undergo signed
+addition.
+
+Semantics:
+""""""""""
+
+The '``llvm.sadd.with.overflow``' family of intrinsic functions perform
+a signed addition of the two variables. They return a structure — the
+first element of which is the signed summation, and the second element
+of which is a bit specifying if the signed summation resulted in an
+overflow.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %res = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+      %sum = extractvalue {i32, i1} %res, 0
+      %obit = extractvalue {i32, i1} %res, 1
+      br i1 %obit, label %overflow, label %normal
+
+'``llvm.uadd.with.overflow.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.uadd.with.overflow``
+on any integer bit width.
+
+::
+
+      declare {i16, i1} @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
+      declare {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+      declare {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+
+Overview:
+"""""""""
+
+The '``llvm.uadd.with.overflow``' family of intrinsic functions perform
+an unsigned addition of the two arguments, and indicate whether a carry
+occurred during the unsigned summation.
+
+Arguments:
+""""""""""
+
+The arguments (%a and %b) and the first element of the result structure
+may be of integer types of any bit width, but they must have the same
+bit width. The second element of the result structure must be of type
+``i1``. ``%a`` and ``%b`` are the two values that will undergo unsigned
+addition.
+
+Semantics:
+""""""""""
+
+The '``llvm.uadd.with.overflow``' family of intrinsic functions perform
+an unsigned addition of the two arguments. They return a structure — the
+first element of which is the sum, and the second element of which is a
+bit specifying if the unsigned summation resulted in a carry.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %res = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+      %sum = extractvalue {i32, i1} %res, 0
+      %obit = extractvalue {i32, i1} %res, 1
+      br i1 %obit, label %carry, label %normal
+
+'``llvm.ssub.with.overflow.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.ssub.with.overflow``
+on any integer bit width.
+
+::
+
+      declare {i16, i1} @llvm.ssub.with.overflow.i16(i16 %a, i16 %b)
+      declare {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
+      declare {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b)
+
+Overview:
+"""""""""
+
+The '``llvm.ssub.with.overflow``' family of intrinsic functions perform
+a signed subtraction of the two arguments, and indicate whether an
+overflow occurred during the signed subtraction.
+
+Arguments:
+""""""""""
+
+The arguments (%a and %b) and the first element of the result structure
+may be of integer types of any bit width, but they must have the same
+bit width. The second element of the result structure must be of type
+``i1``. ``%a`` and ``%b`` are the two values that will undergo signed
+subtraction.
+
+Semantics:
+""""""""""
+
+The '``llvm.ssub.with.overflow``' family of intrinsic functions perform
+a signed subtraction of the two arguments. They return a structure — the
+first element of which is the subtraction, and the second element of
+which is a bit specifying if the signed subtraction resulted in an
+overflow.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %res = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
+      %sum = extractvalue {i32, i1} %res, 0
+      %obit = extractvalue {i32, i1} %res, 1
+      br i1 %obit, label %overflow, label %normal
+
+'``llvm.usub.with.overflow.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.usub.with.overflow``
+on any integer bit width.
+
+::
+
+      declare {i16, i1} @llvm.usub.with.overflow.i16(i16 %a, i16 %b)
+      declare {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+      declare {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+
+Overview:
+"""""""""
+
+The '``llvm.usub.with.overflow``' family of intrinsic functions perform
+an unsigned subtraction of the two arguments, and indicate whether an
+overflow occurred during the unsigned subtraction.
+
+Arguments:
+""""""""""
+
+The arguments (%a and %b) and the first element of the result structure
+may be of integer types of any bit width, but they must have the same
+bit width. The second element of the result structure must be of type
+``i1``. ``%a`` and ``%b`` are the two values that will undergo unsigned
+subtraction.
+
+Semantics:
+""""""""""
+
+The '``llvm.usub.with.overflow``' family of intrinsic functions perform
+an unsigned subtraction of the two arguments. They return a structure —
+the first element of which is the subtraction, and the second element of
+which is a bit specifying if the unsigned subtraction resulted in an
+overflow.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %res = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+      %sum = extractvalue {i32, i1} %res, 0
+      %obit = extractvalue {i32, i1} %res, 1
+      br i1 %obit, label %overflow, label %normal
+
+'``llvm.smul.with.overflow.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.smul.with.overflow``
+on any integer bit width.
+
+::
+
+      declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
+      declare {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+      declare {i64, i1} @llvm.smul.with.overflow.i64(i64 %a, i64 %b)
+
+Overview:
+"""""""""
+
+The '``llvm.smul.with.overflow``' family of intrinsic functions perform
+a signed multiplication of the two arguments, and indicate whether an
+overflow occurred during the signed multiplication.
+
+Arguments:
+""""""""""
+
+The arguments (%a and %b) and the first element of the result structure
+may be of integer types of any bit width, but they must have the same
+bit width. The second element of the result structure must be of type
+``i1``. ``%a`` and ``%b`` are the two values that will undergo signed
+multiplication.
+
+Semantics:
+""""""""""
+
+The '``llvm.smul.with.overflow``' family of intrinsic functions perform
+a signed multiplication of the two arguments. They return a structure —
+the first element of which is the multiplication, and the second element
+of which is a bit specifying if the signed multiplication resulted in an
+overflow.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %res = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+      %sum = extractvalue {i32, i1} %res, 0
+      %obit = extractvalue {i32, i1} %res, 1
+      br i1 %obit, label %overflow, label %normal
+
+'``llvm.umul.with.overflow.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.umul.with.overflow``
+on any integer bit width.
+
+::
+
+      declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
+      declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+      declare {i64, i1} @llvm.umul.with.overflow.i64(i64 %a, i64 %b)
+
+Overview:
+"""""""""
+
+The '``llvm.umul.with.overflow``' family of intrinsic functions perform
+a unsigned multiplication of the two arguments, and indicate whether an
+overflow occurred during the unsigned multiplication.
+
+Arguments:
+""""""""""
+
+The arguments (%a and %b) and the first element of the result structure
+may be of integer types of any bit width, but they must have the same
+bit width. The second element of the result structure must be of type
+``i1``. ``%a`` and ``%b`` are the two values that will undergo unsigned
+multiplication.
+
+Semantics:
+""""""""""
+
+The '``llvm.umul.with.overflow``' family of intrinsic functions perform
+an unsigned multiplication of the two arguments. They return a structure
+— the first element of which is the multiplication, and the second
+element of which is a bit specifying if the unsigned multiplication
+resulted in an overflow.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+      %sum = extractvalue {i32, i1} %res, 0
+      %obit = extractvalue {i32, i1} %res, 1
+      br i1 %obit, label %overflow, label %normal
+
+Specialised Arithmetic Intrinsics
+---------------------------------
+
+'``llvm.fmuladd.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare float @llvm.fmuladd.f32(float %a, float %b, float %c)
+      declare double @llvm.fmuladd.f64(double %a, double %b, double %c)
+
+Overview:
+"""""""""
+
+The '``llvm.fmuladd.*``' intrinsic functions represent multiply-add
+expressions that can be fused if the code generator determines that the
+fused expression would be legal and efficient.
+
+Arguments:
+""""""""""
+
+The '``llvm.fmuladd.*``' intrinsics each take three arguments: two
+multiplicands, a and b, and an addend c.
+
+Semantics:
+""""""""""
+
+The expression:
+
+::
+
+      %0 = call float @llvm.fmuladd.f32(%a, %b, %c)
+
+is equivalent to the expression a \* b + c, except that rounding will
+not be performed between the multiplication and addition steps if the
+code generator fuses the operations. Fusion is not guaranteed, even if
+the target platform supports it. If a fused multiply-add is required the
+corresponding llvm.fma.\* intrinsic function should be used instead.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %r2 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) ; yields {float}:r2 = (a * b) + c
+
+Half Precision Floating Point Intrinsics
+----------------------------------------
+
+For most target platforms, half precision floating point is a
+storage-only format. This means that it is a dense encoding (in memory)
+but does not support computation in the format.
+
+This means that code must first load the half-precision floating point
+value as an i16, then convert it to float with
+:ref:`llvm.convert.from.fp16 <int_convert_from_fp16>`. Computation can
+then be performed on the float value (including extending to double
+etc). To store the value back to memory, it is first converted to float
+if needed, then converted to i16 with
+:ref:`llvm.convert.to.fp16 <int_convert_to_fp16>`, then storing as an
+i16 value.
+
+.. _int_convert_to_fp16:
+
+'``llvm.convert.to.fp16``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i16 @llvm.convert.to.fp16(f32 %a)
+
+Overview:
+"""""""""
+
+The '``llvm.convert.to.fp16``' intrinsic function performs a conversion
+from single precision floating point format to half precision floating
+point format.
+
+Arguments:
+""""""""""
+
+The intrinsic function contains single argument - the value to be
+converted.
+
+Semantics:
+""""""""""
+
+The '``llvm.convert.to.fp16``' intrinsic function performs a conversion
+from single precision floating point format to half precision floating
+point format. The return value is an ``i16`` which contains the
+converted number.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %res = call i16 @llvm.convert.to.fp16(f32 %a)
+      store i16 %res, i16* @x, align 2
+
+.. _int_convert_from_fp16:
+
+'``llvm.convert.from.fp16``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare f32 @llvm.convert.from.fp16(i16 %a)
+
+Overview:
+"""""""""
+
+The '``llvm.convert.from.fp16``' intrinsic function performs a
+conversion from half precision floating point format to single precision
+floating point format.
+
+Arguments:
+""""""""""
+
+The intrinsic function contains single argument - the value to be
+converted.
+
+Semantics:
+""""""""""
+
+The '``llvm.convert.from.fp16``' intrinsic function performs a
+conversion from half single precision floating point format to single
+precision floating point format. The input half-float value is
+represented by an ``i16`` value.
+
+Examples:
+"""""""""
+
+.. code-block:: llvm
+
+      %a = load i16* @x, align 2
+      %res = call f32 @llvm.convert.from.fp16(i16 %a)
+
+Debugger Intrinsics
+-------------------
+
+The LLVM debugger intrinsics (which all start with ``llvm.dbg.``
+prefix), are described in the `LLVM Source Level
+Debugging <SourceLevelDebugging.html#format_common_intrinsics>`_
+document.
+
+Exception Handling Intrinsics
+-----------------------------
+
+The LLVM exception handling intrinsics (which all start with
+``llvm.eh.`` prefix), are described in the `LLVM Exception
+Handling <ExceptionHandling.html#format_common_intrinsics>`_ document.
+
+.. _int_trampoline:
+
+Trampoline Intrinsics
+---------------------
+
+These intrinsics make it possible to excise one parameter, marked with
+the :ref:`nest <nest>` attribute, from a function. The result is a
+callable function pointer lacking the nest parameter - the caller does
+not need to provide a value for it. Instead, the value to use is stored
+in advance in a "trampoline", a block of memory usually allocated on the
+stack, which also contains code to splice the nest value into the
+argument list. This is used to implement the GCC nested function address
+extension.
+
+For example, if the function is ``i32 f(i8* nest %c, i32 %x, i32 %y)``
+then the resulting function pointer has signature ``i32 (i32, i32)*``.
+It can be created as follows:
+
+.. code-block:: llvm
+
+      %tramp = alloca [10 x i8], align 4 ; size and alignment only correct for X86
+      %tramp1 = getelementptr [10 x i8]* %tramp, i32 0, i32 0
+      call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8*, i32, i32)* @f to i8*), i8* %nval)
+      %p = call i8* @llvm.adjust.trampoline(i8* %tramp1)
+      %fp = bitcast i8* %p to i32 (i32, i32)*
+
+The call ``%val = call i32 %fp(i32 %x, i32 %y)`` is then equivalent to
+``%val = call i32 %f(i8* %nval, i32 %x, i32 %y)``.
+
+.. _int_it:
+
+'``llvm.init.trampoline``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.init.trampoline(i8* <tramp>, i8* <func>, i8* <nval>)
+
+Overview:
+"""""""""
+
+This fills the memory pointed to by ``tramp`` with executable code,
+turning it into a trampoline.
+
+Arguments:
+""""""""""
+
+The ``llvm.init.trampoline`` intrinsic takes three arguments, all
+pointers. The ``tramp`` argument must point to a sufficiently large and
+sufficiently aligned block of memory; this memory is written to by the
+intrinsic. Note that the size and the alignment are target-specific -
+LLVM currently provides no portable way of determining them, so a
+front-end that generates this intrinsic needs to have some
+target-specific knowledge. The ``func`` argument must hold a function
+bitcast to an ``i8*``.
+
+Semantics:
+""""""""""
+
+The block of memory pointed to by ``tramp`` is filled with target
+dependent code, turning it into a function. Then ``tramp`` needs to be
+passed to :ref:`llvm.adjust.trampoline <int_at>` to get a pointer which can
+be :ref:`bitcast (to a new function) and called <int_trampoline>`. The new
+function's signature is the same as that of ``func`` with any arguments
+marked with the ``nest`` attribute removed. At most one such ``nest``
+argument is allowed, and it must be of pointer type. Calling the new
+function is equivalent to calling ``func`` with the same argument list,
+but with ``nval`` used for the missing ``nest`` argument. If, after
+calling ``llvm.init.trampoline``, the memory pointed to by ``tramp`` is
+modified, then the effect of any later call to the returned function
+pointer is undefined.
+
+.. _int_at:
+
+'``llvm.adjust.trampoline``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i8* @llvm.adjust.trampoline(i8* <tramp>)
+
+Overview:
+"""""""""
+
+This performs any required machine-specific adjustment to the address of
+a trampoline (passed as ``tramp``).
+
+Arguments:
+""""""""""
+
+``tramp`` must point to a block of memory which already has trampoline
+code filled in by a previous call to
+:ref:`llvm.init.trampoline <int_it>`.
+
+Semantics:
+""""""""""
+
+On some architectures the address of the code to be executed needs to be
+different to the address where the trampoline is actually stored. This
+intrinsic returns the executable address corresponding to ``tramp``
+after performing the required machine specific adjustments. The pointer
+returned can then be :ref:`bitcast and executed <int_trampoline>`.
+
+Memory Use Markers
+------------------
+
+This class of intrinsics exists to information about the lifetime of
+memory objects and ranges where variables are immutable.
+
+'``llvm.lifetime.start``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.lifetime.start(i64 <size>, i8* nocapture <ptr>)
+
+Overview:
+"""""""""
+
+The '``llvm.lifetime.start``' intrinsic specifies the start of a memory
+object's lifetime.
+
+Arguments:
+""""""""""
+
+The first argument is a constant integer representing the size of the
+object, or -1 if it is variable sized. The second argument is a pointer
+to the object.
+
+Semantics:
+""""""""""
+
+This intrinsic indicates that before this point in the code, the value
+of the memory pointed to by ``ptr`` is dead. This means that it is known
+to never be used and has an undefined value. A load from the pointer
+that precedes this intrinsic can be replaced with ``'undef'``.
+
+'``llvm.lifetime.end``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.lifetime.end(i64 <size>, i8* nocapture <ptr>)
+
+Overview:
+"""""""""
+
+The '``llvm.lifetime.end``' intrinsic specifies the end of a memory
+object's lifetime.
+
+Arguments:
+""""""""""
+
+The first argument is a constant integer representing the size of the
+object, or -1 if it is variable sized. The second argument is a pointer
+to the object.
+
+Semantics:
+""""""""""
+
+This intrinsic indicates that after this point in the code, the value of
+the memory pointed to by ``ptr`` is dead. This means that it is known to
+never be used and has an undefined value. Any stores into the memory
+object following this intrinsic may be removed as dead.
+
+'``llvm.invariant.start``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare {}* @llvm.invariant.start(i64 <size>, i8* nocapture <ptr>)
+
+Overview:
+"""""""""
+
+The '``llvm.invariant.start``' intrinsic specifies that the contents of
+a memory object will not change.
+
+Arguments:
+""""""""""
+
+The first argument is a constant integer representing the size of the
+object, or -1 if it is variable sized. The second argument is a pointer
+to the object.
+
+Semantics:
+""""""""""
+
+This intrinsic indicates that until an ``llvm.invariant.end`` that uses
+the return value, the referenced memory location is constant and
+unchanging.
+
+'``llvm.invariant.end``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.invariant.end({}* <start>, i64 <size>, i8* nocapture <ptr>)
+
+Overview:
+"""""""""
+
+The '``llvm.invariant.end``' intrinsic specifies that the contents of a
+memory object are mutable.
+
+Arguments:
+""""""""""
+
+The first argument is the matching ``llvm.invariant.start`` intrinsic.
+The second argument is a constant integer representing the size of the
+object, or -1 if it is variable sized and the third argument is a
+pointer to the object.
+
+Semantics:
+""""""""""
+
+This intrinsic indicates that the memory is mutable again.
+
+General Intrinsics
+------------------
+
+This class of intrinsics is designed to be generic and has no specific
+purpose.
+
+'``llvm.var.annotation``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.var.annotation(i8* <val>, i8* <str>, i8* <str>, i32  <int>)
+
+Overview:
+"""""""""
+
+The '``llvm.var.annotation``' intrinsic.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to a value, the second is a pointer to a
+global string, the third is a pointer to a global string which is the
+source file name, and the last argument is the line number.
+
+Semantics:
+""""""""""
+
+This intrinsic allows annotation of local variables with arbitrary
+strings. This can be useful for special purpose optimizations that want
+to look for these annotations. These have no other defined use; they are
+ignored by code generation and optimization.
+
+'``llvm.annotation.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. You can use '``llvm.annotation``' on
+any integer bit width.
+
+::
+
+      declare i8 @llvm.annotation.i8(i8 <val>, i8* <str>, i8* <str>, i32  <int>)
+      declare i16 @llvm.annotation.i16(i16 <val>, i8* <str>, i8* <str>, i32  <int>)
+      declare i32 @llvm.annotation.i32(i32 <val>, i8* <str>, i8* <str>, i32  <int>)
+      declare i64 @llvm.annotation.i64(i64 <val>, i8* <str>, i8* <str>, i32  <int>)
+      declare i256 @llvm.annotation.i256(i256 <val>, i8* <str>, i8* <str>, i32  <int>)
+
+Overview:
+"""""""""
+
+The '``llvm.annotation``' intrinsic.
+
+Arguments:
+""""""""""
+
+The first argument is an integer value (result of some expression), the
+second is a pointer to a global string, the third is a pointer to a
+global string which is the source file name, and the last argument is
+the line number. It returns the value of the first argument.
+
+Semantics:
+""""""""""
+
+This intrinsic allows annotations to be put on arbitrary expressions
+with arbitrary strings. This can be useful for special purpose
+optimizations that want to look for these annotations. These have no
+other defined use; they are ignored by code generation and optimization.
+
+'``llvm.trap``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.trap() noreturn nounwind
+
+Overview:
+"""""""""
+
+The '``llvm.trap``' intrinsic.
+
+Arguments:
+""""""""""
+
+None.
+
+Semantics:
+""""""""""
+
+This intrinsic is lowered to the target dependent trap instruction. If
+the target does not have a trap instruction, this intrinsic will be
+lowered to a call of the ``abort()`` function.
+
+'``llvm.debugtrap``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.debugtrap() nounwind
+
+Overview:
+"""""""""
+
+The '``llvm.debugtrap``' intrinsic.
+
+Arguments:
+""""""""""
+
+None.
+
+Semantics:
+""""""""""
+
+This intrinsic is lowered to code which is intended to cause an
+execution trap with the intention of requesting the attention of a
+debugger.
+
+'``llvm.stackprotector``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.stackprotector(i8* <guard>, i8** <slot>)
+
+Overview:
+"""""""""
+
+The ``llvm.stackprotector`` intrinsic takes the ``guard`` and stores it
+onto the stack at ``slot``. The stack slot is adjusted to ensure that it
+is placed on the stack before local variables.
+
+Arguments:
+""""""""""
+
+The ``llvm.stackprotector`` intrinsic requires two pointer arguments.
+The first argument is the value loaded from the stack guard
+``@__stack_chk_guard``. The second variable is an ``alloca`` that has
+enough space to hold the value of the guard.
+
+Semantics:
+""""""""""
+
+This intrinsic causes the prologue/epilogue inserter to force the
+position of the ``AllocaInst`` stack slot to be before local variables
+on the stack. This is to ensure that if a local variable on the stack is
+overwritten, it will destroy the value of the guard. When the function
+exits, the guard on the stack is checked against the original guard. If
+they are different, then the program aborts by calling the
+``__stack_chk_fail()`` function.
+
+'``llvm.objectsize``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i32 @llvm.objectsize.i32(i8* <object>, i1 <min>)
+      declare i64 @llvm.objectsize.i64(i8* <object>, i1 <min>)
+
+Overview:
+"""""""""
+
+The ``llvm.objectsize`` intrinsic is designed to provide information to
+the optimizers to determine at compile time whether a) an operation
+(like memcpy) will overflow a buffer that corresponds to an object, or
+b) that a runtime check for overflow isn't necessary. An object in this
+context means an allocation of a specific class, structure, array, or
+other object.
+
+Arguments:
+""""""""""
+
+The ``llvm.objectsize`` intrinsic takes two arguments. The first
+argument is a pointer to or into the ``object``. The second argument is
+a boolean and determines whether ``llvm.objectsize`` returns 0 (if true)
+or -1 (if false) when the object size is unknown. The second argument
+only accepts constants.
+
+Semantics:
+""""""""""
+
+The ``llvm.objectsize`` intrinsic is lowered to a constant representing
+the size of the object concerned. If the size cannot be determined at
+compile time, ``llvm.objectsize`` returns ``i32/i64 -1 or 0`` (depending
+on the ``min`` argument).
+
+'``llvm.expect``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i32 @llvm.expect.i32(i32 <val>, i32 <expected_val>)
+      declare i64 @llvm.expect.i64(i64 <val>, i64 <expected_val>)
+
+Overview:
+"""""""""
+
+The ``llvm.expect`` intrinsic provides information about expected (the
+most probable) value of ``val``, which can be used by optimizers.
+
+Arguments:
+""""""""""
+
+The ``llvm.expect`` intrinsic takes two arguments. The first argument is
+a value. The second argument is an expected value, this needs to be a
+constant value, variables are not allowed.
+
+Semantics:
+""""""""""
+
+This intrinsic is lowered to the ``val``.
+
+'``llvm.donothing``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.donothing() nounwind readnone
+
+Overview:
+"""""""""
+
+The ``llvm.donothing`` intrinsic doesn't perform any operation. It's the
+only intrinsic that can be called with an invoke instruction.
+
+Arguments:
+""""""""""
+
+None.
+
+Semantics:
+""""""""""
+
+This intrinsic does nothing, and it's removed by optimizers and ignored
+by codegen.
diff --git a/docs/Makefile.sphinx b/docs/Makefile.sphinx
index 81c13de9cd..3746522db6 100644
--- a/docs/Makefile.sphinx
+++ b/docs/Makefile.sphinx
@@ -49,7 +49,7 @@ html:
 	@# FIXME: Remove this `cp` once HTML->Sphinx transition is completed.
 	@# Kind of a hack, but HTML-formatted docs are on the way out anyway.
 	@echo "Copying legacy HTML-formatted docs into $(BUILDDIR)/html"
-	@cp -a *.html tutorial $(BUILDDIR)/html
+	@cp -a *.html $(BUILDDIR)/html
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 
 dirhtml:
diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html
deleted file mode 100644
index 64ddb9d105..0000000000
--- a/docs/ProgrammersManual.html
+++ /dev/null
@@ -1,4156 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-type" content="text/html;charset=UTF-8">
-  <title>LLVM Programmer's Manual</title>
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-<body>
-
-<h1>
-  LLVM Programmer's Manual
-</h1>
-
-<ol>
-  <li><a href="#introduction">Introduction</a></li>
-  <li><a href="#general">General Information</a>
-    <ul>
-      <li><a href="#stl">The C++ Standard Template Library</a></li>
-<!--
-      <li>The <tt>-time-passes</tt> option</li>
-      <li>How to use the LLVM Makefile system</li>
-      <li>How to write a regression test</li>
-
---> 
-    </ul>
-  </li>
-  <li><a href="#apis">Important and useful LLVM APIs</a>
-    <ul>
-      <li><a href="#isa">The <tt>isa&lt;&gt;</tt>, <tt>cast&lt;&gt;</tt>
-and <tt>dyn_cast&lt;&gt;</tt> templates</a> </li>
-      <li><a href="#string_apis">Passing strings (the <tt>StringRef</tt>
-and <tt>Twine</tt> classes)</a>
-        <ul>
-          <li><a href="#StringRef">The <tt>StringRef</tt> class</a> </li>
-          <li><a href="#Twine">The <tt>Twine</tt> class</a> </li>
-        </ul>
-      </li>
-      <li><a href="#DEBUG">The <tt>DEBUG()</tt> macro and <tt>-debug</tt>
-option</a>
-        <ul>
-          <li><a href="#DEBUG_TYPE">Fine grained debug info with <tt>DEBUG_TYPE</tt>
-and the <tt>-debug-only</tt> option</a> </li>
-        </ul>
-      </li>
-      <li><a href="#Statistic">The <tt>Statistic</tt> class &amp; <tt>-stats</tt>
-option</a></li>
-<!--
-      <li>The <tt>InstVisitor</tt> template
-      <li>The general graph API
---> 
-      <li><a href="#ViewGraph">Viewing graphs while debugging code</a></li>
-    </ul>
-  </li>
-  <li><a href="#datastructure">Picking the Right Data Structure for a Task</a>
-    <ul>
-    <li><a href="#ds_sequential">Sequential Containers (std::vector, std::list, etc)</a>
-    <ul>
-      <li><a href="#dss_arrayref">llvm/ADT/ArrayRef.h</a></li>
-      <li><a href="#dss_fixedarrays">Fixed Size Arrays</a></li>
-      <li><a href="#dss_heaparrays">Heap Allocated Arrays</a></li>
-      <li><a href="#dss_tinyptrvector">"llvm/ADT/TinyPtrVector.h"</a></li>
-      <li><a href="#dss_smallvector">"llvm/ADT/SmallVector.h"</a></li>
-      <li><a href="#dss_vector">&lt;vector&gt;</a></li>
-      <li><a href="#dss_deque">&lt;deque&gt;</a></li>
-      <li><a href="#dss_list">&lt;list&gt;</a></li>
-      <li><a href="#dss_ilist">llvm/ADT/ilist.h</a></li>
-      <li><a href="#dss_packedvector">llvm/ADT/PackedVector.h</a></li>
-      <li><a href="#dss_other">Other Sequential Container Options</a></li>
-    </ul></li>
-    <li><a href="#ds_string">String-like containers</a>
-    <ul>
-      <li><a href="#dss_stringref">llvm/ADT/StringRef.h</a></li>
-      <li><a href="#dss_twine">llvm/ADT/Twine.h</a></li>
-      <li><a href="#dss_smallstring">llvm/ADT/SmallString.h</a></li>
-      <li><a href="#dss_stdstring">std::string</a></li>
-    </ul></li>
-    <li><a href="#ds_set">Set-Like Containers (std::set, SmallSet, SetVector, etc)</a>
-    <ul>
-      <li><a href="#dss_sortedvectorset">A sorted 'vector'</a></li>
-      <li><a href="#dss_smallset">"llvm/ADT/SmallSet.h"</a></li>
-      <li><a href="#dss_smallptrset">"llvm/ADT/SmallPtrSet.h"</a></li>
-      <li><a href="#dss_denseset">"llvm/ADT/DenseSet.h"</a></li>
-      <li><a href="#dss_sparseset">"llvm/ADT/SparseSet.h"</a></li>
-      <li><a href="#dss_FoldingSet">"llvm/ADT/FoldingSet.h"</a></li>
-      <li><a href="#dss_set">&lt;set&gt;</a></li>
-      <li><a href="#dss_setvector">"llvm/ADT/SetVector.h"</a></li>
-      <li><a href="#dss_uniquevector">"llvm/ADT/UniqueVector.h"</a></li>
-      <li><a href="#dss_immutableset">"llvm/ADT/ImmutableSet.h"</a></li>
-      <li><a href="#dss_otherset">Other Set-Like Container Options</a></li>
-    </ul></li>
-    <li><a href="#ds_map">Map-Like Containers (std::map, DenseMap, etc)</a>
-    <ul>
-      <li><a href="#dss_sortedvectormap">A sorted 'vector'</a></li>
-      <li><a href="#dss_stringmap">"llvm/ADT/StringMap.h"</a></li>
-      <li><a href="#dss_indexedmap">"llvm/ADT/IndexedMap.h"</a></li>
-      <li><a href="#dss_densemap">"llvm/ADT/DenseMap.h"</a></li>
-      <li><a href="#dss_valuemap">"llvm/ADT/ValueMap.h"</a></li>
-      <li><a href="#dss_intervalmap">"llvm/ADT/IntervalMap.h"</a></li>
-      <li><a href="#dss_map">&lt;map&gt;</a></li>
-      <li><a href="#dss_mapvector">"llvm/ADT/MapVector.h"</a></li>
-      <li><a href="#dss_inteqclasses">"llvm/ADT/IntEqClasses.h"</a></li>
-      <li><a href="#dss_immutablemap">"llvm/ADT/ImmutableMap.h"</a></li>
-      <li><a href="#dss_othermap">Other Map-Like Container Options</a></li>
-    </ul></li>
-    <li><a href="#ds_bit">BitVector-like containers</a>
-    <ul>
-      <li><a href="#dss_bitvector">A dense bitvector</a></li>
-      <li><a href="#dss_smallbitvector">A "small" dense bitvector</a></li>
-      <li><a href="#dss_sparsebitvector">A sparse bitvector</a></li>
-    </ul></li>
-  </ul>
-  </li>
-  <li><a href="#common">Helpful Hints for Common Operations</a>
-    <ul>
-      <li><a href="#inspection">Basic Inspection and Traversal Routines</a>
-        <ul>
-          <li><a href="#iterate_function">Iterating over the <tt>BasicBlock</tt>s
-in a <tt>Function</tt></a> </li>
-          <li><a href="#iterate_basicblock">Iterating over the <tt>Instruction</tt>s
-in a <tt>BasicBlock</tt></a> </li>
-          <li><a href="#iterate_institer">Iterating over the <tt>Instruction</tt>s
-in a <tt>Function</tt></a> </li>
-          <li><a href="#iterate_convert">Turning an iterator into a
-class pointer</a> </li>
-          <li><a href="#iterate_complex">Finding call sites: a more
-complex example</a> </li>
-          <li><a href="#calls_and_invokes">Treating calls and invokes
-the same way</a> </li>
-          <li><a href="#iterate_chains">Iterating over def-use &amp;
-use-def chains</a> </li>
-          <li><a href="#iterate_preds">Iterating over predecessors &amp;
-successors of blocks</a></li>
-        </ul>
-      </li>
-      <li><a href="#simplechanges">Making simple changes</a>
-        <ul>
-          <li><a href="#schanges_creating">Creating and inserting new
-		 <tt>Instruction</tt>s</a> </li>
-          <li><a href="#schanges_deleting">Deleting 		 <tt>Instruction</tt>s</a> </li>
-          <li><a href="#schanges_replacing">Replacing an 		 <tt>Instruction</tt>
-with another <tt>Value</tt></a> </li>
-          <li><a href="#schanges_deletingGV">Deleting <tt>GlobalVariable</tt>s</a> </li>  
-        </ul>
-      </li>
-      <li><a href="#create_types">How to Create Types</a></li>
-<!--
-    <li>Working with the Control Flow Graph
-    <ul>
-      <li>Accessing predecessors and successors of a <tt>BasicBlock</tt>
-      <li>
-      <li>
-    </ul>
---> 
-    </ul>
-  </li>
-
-  <li><a href="#threading">Threads and LLVM</a>
-  <ul>
-    <li><a href="#startmultithreaded">Entering and Exiting Multithreaded Mode
-        </a></li>
-    <li><a href="#shutdown">Ending execution with <tt>llvm_shutdown()</tt></a></li>
-    <li><a href="#managedstatic">Lazy initialization with <tt>ManagedStatic</tt></a></li>
-    <li><a href="#llvmcontext">Achieving Isolation with <tt>LLVMContext</tt></a></li>
-    <li><a href="#jitthreading">Threads and the JIT</a></li>
-  </ul>
-  </li>
-
-  <li><a href="#advanced">Advanced Topics</a>
-  <ul>
-
-  <li><a href="#SymbolTable">The <tt>ValueSymbolTable</tt> class</a></li>
-  <li><a href="#UserLayout">The <tt>User</tt> and owned <tt>Use</tt> classes' memory layout</a></li>
-  </ul></li>
-
-  <li><a href="#coreclasses">The Core LLVM Class Hierarchy Reference</a>
-    <ul>
-      <li><a href="#Type">The <tt>Type</tt> class</a> </li>
-      <li><a href="#Module">The <tt>Module</tt> class</a></li>
-      <li><a href="#Value">The <tt>Value</tt> class</a>
-      <ul>
-        <li><a href="#User">The <tt>User</tt> class</a>
-        <ul>
-          <li><a href="#Instruction">The <tt>Instruction</tt> class</a></li>
-          <li><a href="#Constant">The <tt>Constant</tt> class</a>
-          <ul>
-            <li><a href="#GlobalValue">The <tt>GlobalValue</tt> class</a>
-            <ul>
-              <li><a href="#Function">The <tt>Function</tt> class</a></li>
-              <li><a href="#GlobalVariable">The <tt>GlobalVariable</tt> class</a></li>
-            </ul>
-            </li>
-          </ul>
-          </li>
-        </ul>
-        </li>
-        <li><a href="#BasicBlock">The <tt>BasicBlock</tt> class</a></li>
-        <li><a href="#Argument">The <tt>Argument</tt> class</a></li>
-      </ul>
-      </li>
-    </ul>
-  </li>
-</ol>
-
-<div class="doc_author">    
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>, 
-                <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a>, 
-                <a href="mailto:ggreif@gmail.com">Gabor Greif</a>, 
-                <a href="mailto:jstanley@cs.uiuc.edu">Joel Stanley</a>,
-                <a href="mailto:rspencer@x10sys.com">Reid Spencer</a> and
-                <a href="mailto:owen@apple.com">Owen Anderson</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="introduction">Introduction </a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This document is meant to highlight some of the important classes and
-interfaces available in the LLVM source-base.  This manual is not
-intended to explain what LLVM is, how it works, and what LLVM code looks
-like.  It assumes that you know the basics of LLVM and are interested
-in writing transformations or otherwise analyzing or manipulating the
-code.</p>
-
-<p>This document should get you oriented so that you can find your
-way in the continuously growing source code that makes up the LLVM
-infrastructure. Note that this manual is not intended to serve as a
-replacement for reading the source code, so if you think there should be
-a method in one of these classes to do something, but it's not listed,
-check the source.  Links to the <a href="/doxygen/">doxygen</a> sources
-are provided to make this as easy as possible.</p>
-
-<p>The first section of this document describes general information that is
-useful to know when working in the LLVM infrastructure, and the second describes
-the Core LLVM classes.  In the future this manual will be extended with
-information describing how to use extension libraries, such as dominator
-information, CFG traversal routines, and useful utilities like the <tt><a
-href="/doxygen/InstVisitor_8h-source.html">InstVisitor</a></tt> template.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="general">General Information</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This section contains general information that is useful if you are working
-in the LLVM source-base, but that isn't specific to any particular API.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="stl">The C++ Standard Template Library</a>
-</h3>
-
-<div>
-
-<p>LLVM makes heavy use of the C++ Standard Template Library (STL),
-perhaps much more than you are used to, or have seen before.  Because of
-this, you might want to do a little background reading in the
-techniques used and capabilities of the library.  There are many good
-pages that discuss the STL, and several books on the subject that you
-can get, so it will not be discussed in this document.</p>
-
-<p>Here are some useful links:</p>
-
-<ol>
-
-<li><a href="http://www.dinkumware.com/manuals/#Standard C++ Library">Dinkumware
-C++ Library reference</a> - an excellent reference for the STL and other parts
-of the standard C++ library.</li>
-
-<li><a href="http://www.tempest-sw.com/cpp/">C++ In a Nutshell</a> - This is an
-O'Reilly book in the making.  It has a decent Standard Library
-Reference that rivals Dinkumware's, and is unfortunately no longer free since the
-book has been published.</li>
-
-<li><a href="http://www.parashift.com/c++-faq-lite/">C++ Frequently Asked
-Questions</a></li>
-
-<li><a href="http://www.sgi.com/tech/stl/">SGI's STL Programmer's Guide</a> -
-Contains a useful <a
-href="http://www.sgi.com/tech/stl/stl_introduction.html">Introduction to the
-STL</a>.</li>
-
-<li><a href="http://www.research.att.com/%7Ebs/C++.html">Bjarne Stroustrup's C++
-Page</a></li>
-
-<li><a href="http://64.78.49.204/">
-Bruce Eckel's Thinking in C++, 2nd ed. Volume 2 Revision 4.0 (even better, get
-the book).</a></li>
-
-</ol>
-  
-<p>You are also encouraged to take a look at the <a
-href="CodingStandards.html">LLVM Coding Standards</a> guide which focuses on how
-to write maintainable code more than where to put your curly braces.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="stl">Other useful references</a>
-</h3>
-
-<div>
-
-<ol>
-<li><a href="http://www.fortran-2000.com/ArnaudRecipes/sharedlib.html">Using
-static and shared libraries across platforms</a></li>
-</ol>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="apis">Important and useful LLVM APIs</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Here we highlight some LLVM APIs that are generally useful and good to
-know about when writing transformations.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="isa">The <tt>isa&lt;&gt;</tt>, <tt>cast&lt;&gt;</tt> and
-  <tt>dyn_cast&lt;&gt;</tt> templates</a>
-</h3>
-
-<div>
-
-<p>The LLVM source-base makes extensive use of a custom form of RTTI.
-These templates have many similarities to the C++ <tt>dynamic_cast&lt;&gt;</tt>
-operator, but they don't have some drawbacks (primarily stemming from
-the fact that <tt>dynamic_cast&lt;&gt;</tt> only works on classes that
-have a v-table). Because they are used so often, you must know what they
-do and how they work. All of these templates are defined in the <a
- href="/doxygen/Casting_8h-source.html"><tt>llvm/Support/Casting.h</tt></a>
-file (note that you very rarely have to include this file directly).</p>
-
-<dl>
-  <dt><tt>isa&lt;&gt;</tt>: </dt>
-
-  <dd><p>The <tt>isa&lt;&gt;</tt> operator works exactly like the Java
-  "<tt>instanceof</tt>" operator.  It returns true or false depending on whether
-  a reference or pointer points to an instance of the specified class.  This can
-  be very useful for constraint checking of various sorts (example below).</p>
-  </dd>
-
-  <dt><tt>cast&lt;&gt;</tt>: </dt>
-
-  <dd><p>The <tt>cast&lt;&gt;</tt> operator is a "checked cast" operation. It
-  converts a pointer or reference from a base class to a derived class, causing
-  an assertion failure if it is not really an instance of the right type.  This
-  should be used in cases where you have some information that makes you believe
-  that something is of the right type.  An example of the <tt>isa&lt;&gt;</tt>
-  and <tt>cast&lt;&gt;</tt> template is:</p>
-
-<div class="doc_code">
-<pre>
-static bool isLoopInvariant(const <a href="#Value">Value</a> *V, const Loop *L) {
-  if (isa&lt;<a href="#Constant">Constant</a>&gt;(V) || isa&lt;<a href="#Argument">Argument</a>&gt;(V) || isa&lt;<a href="#GlobalValue">GlobalValue</a>&gt;(V))
-    return true;
-
-  // <i>Otherwise, it must be an instruction...</i>
-  return !L-&gt;contains(cast&lt;<a href="#Instruction">Instruction</a>&gt;(V)-&gt;getParent());
-}
-</pre>
-</div>
-
-  <p>Note that you should <b>not</b> use an <tt>isa&lt;&gt;</tt> test followed
-  by a <tt>cast&lt;&gt;</tt>, for that use the <tt>dyn_cast&lt;&gt;</tt>
-  operator.</p>
-
-  </dd>
-
-  <dt><tt>dyn_cast&lt;&gt;</tt>:</dt>
-
-  <dd><p>The <tt>dyn_cast&lt;&gt;</tt> operator is a "checking cast" operation.
-  It checks to see if the operand is of the specified type, and if so, returns a
-  pointer to it (this operator does not work with references). If the operand is
-  not of the correct type, a null pointer is returned.  Thus, this works very
-  much like the <tt>dynamic_cast&lt;&gt;</tt> operator in C++, and should be
-  used in the same circumstances.  Typically, the <tt>dyn_cast&lt;&gt;</tt>
-  operator is used in an <tt>if</tt> statement or some other flow control
-  statement like this:</p>
-
-<div class="doc_code">
-<pre>
-if (<a href="#AllocationInst">AllocationInst</a> *AI = dyn_cast&lt;<a href="#AllocationInst">AllocationInst</a>&gt;(Val)) {
-  // <i>...</i>
-}
-</pre>
-</div>
-   
-  <p>This form of the <tt>if</tt> statement effectively combines together a call
-  to <tt>isa&lt;&gt;</tt> and a call to <tt>cast&lt;&gt;</tt> into one
-  statement, which is very convenient.</p>
-
-  <p>Note that the <tt>dyn_cast&lt;&gt;</tt> operator, like C++'s
-  <tt>dynamic_cast&lt;&gt;</tt> or Java's <tt>instanceof</tt> operator, can be
-  abused.  In particular, you should not use big chained <tt>if/then/else</tt>
-  blocks to check for lots of different variants of classes.  If you find
-  yourself wanting to do this, it is much cleaner and more efficient to use the
-  <tt>InstVisitor</tt> class to dispatch over the instruction type directly.</p>
-
-  </dd>
-
-  <dt><tt>cast_or_null&lt;&gt;</tt>: </dt>
-  
-  <dd><p>The <tt>cast_or_null&lt;&gt;</tt> operator works just like the
-  <tt>cast&lt;&gt;</tt> operator, except that it allows for a null pointer as an
-  argument (which it then propagates).  This can sometimes be useful, allowing
-  you to combine several null checks into one.</p></dd>
-
-  <dt><tt>dyn_cast_or_null&lt;&gt;</tt>: </dt>
-
-  <dd><p>The <tt>dyn_cast_or_null&lt;&gt;</tt> operator works just like the
-  <tt>dyn_cast&lt;&gt;</tt> operator, except that it allows for a null pointer
-  as an argument (which it then propagates).  This can sometimes be useful,
-  allowing you to combine several null checks into one.</p></dd>
-
-</dl>
-
-<p>These five templates can be used with any classes, whether they have a
-v-table or not. If you want to add support for these templates, see the
-document <a href="HowToSetUpLLVMStyleRTTI.html">How to set up LLVM-style
-RTTI for your class hierarchy </a>.
-</p>
-
-</div>
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="string_apis">Passing strings (the <tt>StringRef</tt>
-and <tt>Twine</tt> classes)</a>
-</h3>
-
-<div>
-
-<p>Although LLVM generally does not do much string manipulation, we do have
-several important APIs which take strings.  Two important examples are the
-Value class -- which has names for instructions, functions, etc. -- and the
-StringMap class which is used extensively in LLVM and Clang.</p>
-
-<p>These are generic classes, and they need to be able to accept strings which
-may have embedded null characters.  Therefore, they cannot simply take
-a <tt>const char *</tt>, and taking a <tt>const std::string&amp;</tt> requires
-clients to perform a heap allocation which is usually unnecessary.  Instead,
-many LLVM APIs use a <tt>StringRef</tt> or a <tt>const Twine&amp;</tt> for
-passing strings efficiently.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="StringRef">The <tt>StringRef</tt> class</a>
-</h4>
-
-<div>
-
-<p>The <tt>StringRef</tt> data type represents a reference to a constant string
-(a character array and a length) and supports the common operations available
-on <tt>std:string</tt>, but does not require heap allocation.</p>
-
-<p>It can be implicitly constructed using a C style null-terminated string,
-an <tt>std::string</tt>, or explicitly with a character pointer and length.
-For example, the <tt>StringRef</tt> find function is declared as:</p>
-
-<pre class="doc_code">
-  iterator find(StringRef Key);
-</pre>
-
-<p>and clients can call it using any one of:</p>
-
-<pre class="doc_code">
-  Map.find("foo");                 <i>// Lookup "foo"</i>
-  Map.find(std::string("bar"));    <i>// Lookup "bar"</i>
-  Map.find(StringRef("\0baz", 4)); <i>// Lookup "\0baz"</i>
-</pre>
-
-<p>Similarly, APIs which need to return a string may return a <tt>StringRef</tt>
-instance, which can be used directly or converted to an <tt>std::string</tt>
-using the <tt>str</tt> member function.  See 
-"<tt><a href="/doxygen/classllvm_1_1StringRef_8h-source.html">llvm/ADT/StringRef.h</a></tt>"
-for more information.</p>
-
-<p>You should rarely use the <tt>StringRef</tt> class directly, because it contains
-pointers to external memory it is not generally safe to store an instance of the
-class (unless you know that the external storage will not be freed). StringRef is
-small and pervasive enough in LLVM that it should always be passed by value.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="Twine">The <tt>Twine</tt> class</a>
-</h4>
-
-<div>
-
-<p>The <tt><a href="/doxygen/classllvm_1_1Twine.html">Twine</a></tt> class is an
-efficient way for APIs to accept concatenated strings.  For example, a common
-LLVM paradigm is to name one instruction based on
-the name of another instruction with a suffix, for example:</p>
-
-<div class="doc_code">
-<pre>
-    New = CmpInst::Create(<i>...</i>, SO->getName() + ".cmp");
-</pre>
-</div>
-
-<p>The <tt>Twine</tt> class is effectively a lightweight
-<a href="http://en.wikipedia.org/wiki/Rope_(computer_science)">rope</a>
-which points to temporary (stack allocated) objects.  Twines can be implicitly
-constructed as the result of the plus operator applied to strings (i.e., a C
-strings, an <tt>std::string</tt>, or a <tt>StringRef</tt>).  The twine delays
-the actual concatenation of strings until it is actually required, at which
-point it can be efficiently rendered directly into a character array.  This
-avoids unnecessary heap allocation involved in constructing the temporary
-results of string concatenation. See
-"<tt><a href="/doxygen/Twine_8h_source.html">llvm/ADT/Twine.h</a></tt>"
-and <a href="#dss_twine">here</a> for more information.</p>
-
-<p>As with a <tt>StringRef</tt>, <tt>Twine</tt> objects point to external memory
-and should almost never be stored or mentioned directly.  They are intended
-solely for use when defining a function which should be able to efficiently
-accept concatenated strings.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="DEBUG">The <tt>DEBUG()</tt> macro and <tt>-debug</tt> option</a>
-</h3>
-
-<div>
-
-<p>Often when working on your pass you will put a bunch of debugging printouts
-and other code into your pass.  After you get it working, you want to remove
-it, but you may need it again in the future (to work out new bugs that you run
-across).</p>
-
-<p> Naturally, because of this, you don't want to delete the debug printouts,
-but you don't want them to always be noisy.  A standard compromise is to comment
-them out, allowing you to enable them if you need them in the future.</p>
-
-<p>The "<tt><a href="/doxygen/Debug_8h-source.html">llvm/Support/Debug.h</a></tt>"
-file provides a macro named <tt>DEBUG()</tt> that is a much nicer solution to
-this problem.  Basically, you can put arbitrary code into the argument of the
-<tt>DEBUG</tt> macro, and it is only executed if '<tt>opt</tt>' (or any other
-tool) is run with the '<tt>-debug</tt>' command line argument:</p>
-
-<div class="doc_code">
-<pre>
-DEBUG(errs() &lt;&lt; "I am here!\n");
-</pre>
-</div>
-
-<p>Then you can run your pass like this:</p>
-
-<div class="doc_code">
-<pre>
-$ opt &lt; a.bc &gt; /dev/null -mypass
-<i>&lt;no output&gt;</i>
-$ opt &lt; a.bc &gt; /dev/null -mypass -debug
-I am here!
-</pre>
-</div>
-
-<p>Using the <tt>DEBUG()</tt> macro instead of a home-brewed solution allows you
-to not have to create "yet another" command line option for the debug output for
-your pass.  Note that <tt>DEBUG()</tt> macros are disabled for optimized builds,
-so they do not cause a performance impact at all (for the same reason, they
-should also not contain side-effects!).</p>
-
-<p>One additional nice thing about the <tt>DEBUG()</tt> macro is that you can
-enable or disable it directly in gdb.  Just use "<tt>set DebugFlag=0</tt>" or
-"<tt>set DebugFlag=1</tt>" from the gdb if the program is running.  If the
-program hasn't been started yet, you can always just run it with
-<tt>-debug</tt>.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="DEBUG_TYPE">Fine grained debug info with <tt>DEBUG_TYPE</tt> and
-  the <tt>-debug-only</tt> option</a>
-</h4>
-
-<div>
-
-<p>Sometimes you may find yourself in a situation where enabling <tt>-debug</tt>
-just turns on <b>too much</b> information (such as when working on the code
-generator).  If you want to enable debug information with more fine-grained
-control, you define the <tt>DEBUG_TYPE</tt> macro and the <tt>-debug</tt> only
-option as follows:</p>
-
-<div class="doc_code">
-<pre>
-#undef  DEBUG_TYPE
-DEBUG(errs() &lt;&lt; "No debug type\n");
-#define DEBUG_TYPE "foo"
-DEBUG(errs() &lt;&lt; "'foo' debug type\n");
-#undef  DEBUG_TYPE
-#define DEBUG_TYPE "bar"
-DEBUG(errs() &lt;&lt; "'bar' debug type\n"));
-#undef  DEBUG_TYPE
-#define DEBUG_TYPE ""
-DEBUG(errs() &lt;&lt; "No debug type (2)\n");
-</pre>
-</div>
-
-<p>Then you can run your pass like this:</p>
-
-<div class="doc_code">
-<pre>
-$ opt &lt; a.bc &gt; /dev/null -mypass
-<i>&lt;no output&gt;</i>
-$ opt &lt; a.bc &gt; /dev/null -mypass -debug
-No debug type
-'foo' debug type
-'bar' debug type
-No debug type (2)
-$ opt &lt; a.bc &gt; /dev/null -mypass -debug-only=foo
-'foo' debug type
-$ opt &lt; a.bc &gt; /dev/null -mypass -debug-only=bar
-'bar' debug type
-</pre>
-</div>
-
-<p>Of course, in practice, you should only set <tt>DEBUG_TYPE</tt> at the top of
-a file, to specify the debug type for the entire module (if you do this before
-you <tt>#include "llvm/Support/Debug.h"</tt>, you don't have to insert the ugly
-<tt>#undef</tt>'s).  Also, you should use names more meaningful than "foo" and
-"bar", because there is no system in place to ensure that names do not
-conflict. If two different modules use the same string, they will all be turned
-on when the name is specified. This allows, for example, all debug information
-for instruction scheduling to be enabled with <tt>-debug-type=InstrSched</tt>,
-even if the source lives in multiple files.</p>
-
-<p>The <tt>DEBUG_WITH_TYPE</tt> macro is also available for situations where you
-would like to set <tt>DEBUG_TYPE</tt>, but only for one specific <tt>DEBUG</tt>
-statement. It takes an additional first parameter, which is the type to use. For
-example, the preceding example could be written as:</p>
-
-
-<div class="doc_code">
-<pre>
-DEBUG_WITH_TYPE("", errs() &lt;&lt; "No debug type\n");
-DEBUG_WITH_TYPE("foo", errs() &lt;&lt; "'foo' debug type\n");
-DEBUG_WITH_TYPE("bar", errs() &lt;&lt; "'bar' debug type\n"));
-DEBUG_WITH_TYPE("", errs() &lt;&lt; "No debug type (2)\n");
-</pre>
-</div>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="Statistic">The <tt>Statistic</tt> class &amp; <tt>-stats</tt>
-  option</a>
-</h3>
-
-<div>
-
-<p>The "<tt><a
-href="/doxygen/Statistic_8h-source.html">llvm/ADT/Statistic.h</a></tt>" file
-provides a class named <tt>Statistic</tt> that is used as a unified way to
-keep track of what the LLVM compiler is doing and how effective various
-optimizations are.  It is useful to see what optimizations are contributing to
-making a particular program run faster.</p>
-
-<p>Often you may run your pass on some big program, and you're interested to see
-how many times it makes a certain transformation.  Although you can do this with
-hand inspection, or some ad-hoc method, this is a real pain and not very useful
-for big programs.  Using the <tt>Statistic</tt> class makes it very easy to
-keep track of this information, and the calculated information is presented in a
-uniform manner with the rest of the passes being executed.</p>
-
-<p>There are many examples of <tt>Statistic</tt> uses, but the basics of using
-it are as follows:</p>
-
-<ol>
-    <li><p>Define your statistic like this:</p>
-
-<div class="doc_code">
-<pre>
-#define <a href="#DEBUG_TYPE">DEBUG_TYPE</a> "mypassname"   <i>// This goes before any #includes.</i>
-STATISTIC(NumXForms, "The # of times I did stuff");
-</pre>
-</div>
-
-  <p>The <tt>STATISTIC</tt> macro defines a static variable, whose name is
-    specified by the first argument.  The pass name is taken from the DEBUG_TYPE
-    macro, and the description is taken from the second argument.  The variable
-    defined ("NumXForms" in this case) acts like an unsigned integer.</p></li>
-
-    <li><p>Whenever you make a transformation, bump the counter:</p>
-
-<div class="doc_code">
-<pre>
-++NumXForms;   // <i>I did stuff!</i>
-</pre>
-</div>
-
-    </li>
-  </ol>
-
-  <p>That's all you have to do.  To get '<tt>opt</tt>' to print out the
-  statistics gathered, use the '<tt>-stats</tt>' option:</p>
-
-<div class="doc_code">
-<pre>
-$ opt -stats -mypassname &lt; program.bc &gt; /dev/null
-<i>... statistics output ...</i>
-</pre>
-</div>
-
-  <p> When running <tt>opt</tt> on a C file from the SPEC benchmark
-suite, it gives a report that looks like this:</p>
-
-<div class="doc_code">
-<pre>
-   7646 bitcodewriter   - Number of normal instructions
-    725 bitcodewriter   - Number of oversized instructions
- 129996 bitcodewriter   - Number of bitcode bytes written
-   2817 raise           - Number of insts DCEd or constprop'd
-   3213 raise           - Number of cast-of-self removed
-   5046 raise           - Number of expression trees converted
-     75 raise           - Number of other getelementptr's formed
-    138 raise           - Number of load/store peepholes
-     42 deadtypeelim    - Number of unused typenames removed from symtab
-    392 funcresolve     - Number of varargs functions resolved
-     27 globaldce       - Number of global variables removed
-      2 adce            - Number of basic blocks removed
-    134 cee             - Number of branches revectored
-     49 cee             - Number of setcc instruction eliminated
-    532 gcse            - Number of loads removed
-   2919 gcse            - Number of instructions removed
-     86 indvars         - Number of canonical indvars added
-     87 indvars         - Number of aux indvars removed
-     25 instcombine     - Number of dead inst eliminate
-    434 instcombine     - Number of insts combined
-    248 licm            - Number of load insts hoisted
-   1298 licm            - Number of insts hoisted to a loop pre-header
-      3 licm            - Number of insts hoisted to multiple loop preds (bad, no loop pre-header)
-     75 mem2reg         - Number of alloca's promoted
-   1444 cfgsimplify     - Number of blocks simplified
-</pre>
-</div>
-
-<p>Obviously, with so many optimizations, having a unified framework for this
-stuff is very nice.  Making your pass fit well into the framework makes it more
-maintainable and useful.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ViewGraph">Viewing graphs while debugging code</a>
-</h3>
-
-<div>
-
-<p>Several of the important data structures in LLVM are graphs: for example
-CFGs made out of LLVM <a href="#BasicBlock">BasicBlock</a>s, CFGs made out of
-LLVM <a href="CodeGenerator.html#machinebasicblock">MachineBasicBlock</a>s, and
-<a href="CodeGenerator.html#selectiondag_intro">Instruction Selection
-DAGs</a>.  In many cases, while debugging various parts of the compiler, it is
-nice to instantly visualize these graphs.</p>
-
-<p>LLVM provides several callbacks that are available in a debug build to do
-exactly that.  If you call the <tt>Function::viewCFG()</tt> method, for example,
-the current LLVM tool will pop up a window containing the CFG for the function
-where each basic block is a node in the graph, and each node contains the
-instructions in the block.  Similarly, there also exists 
-<tt>Function::viewCFGOnly()</tt> (does not include the instructions), the
-<tt>MachineFunction::viewCFG()</tt> and <tt>MachineFunction::viewCFGOnly()</tt>,
-and the <tt>SelectionDAG::viewGraph()</tt> methods.  Within GDB, for example,
-you can usually use something like <tt>call DAG.viewGraph()</tt> to pop
-up a window.  Alternatively, you can sprinkle calls to these functions in your
-code in places you want to debug.</p>
-
-<p>Getting this to work requires a small amount of configuration.  On Unix
-systems with X11, install the <a href="http://www.graphviz.org">graphviz</a>
-toolkit, and make sure 'dot' and 'gv' are in your path.  If you are running on
-Mac OS/X, download and install the Mac OS/X <a 
-href="http://www.pixelglow.com/graphviz/">Graphviz program</a>, and add
-<tt>/Applications/Graphviz.app/Contents/MacOS/</tt> (or wherever you install
-it) to your path.  Once in your system and path are set up, rerun the LLVM
-configure script and rebuild LLVM to enable this functionality.</p>
-
-<p><tt>SelectionDAG</tt> has been extended to make it easier to locate
-<i>interesting</i> nodes in large complex graphs.  From gdb, if you
-<tt>call DAG.setGraphColor(<i>node</i>, "<i>color</i>")</tt>, then the
-next <tt>call DAG.viewGraph()</tt> would highlight the node in the
-specified color (choices of colors can be found at <a
-href="http://www.graphviz.org/doc/info/colors.html">colors</a>.) More
-complex node attributes can be provided with <tt>call
-DAG.setGraphAttrs(<i>node</i>, "<i>attributes</i>")</tt> (choices can be
-found at <a href="http://www.graphviz.org/doc/info/attrs.html">Graph
-Attributes</a>.)  If you want to restart and clear all the current graph
-attributes, then you can <tt>call DAG.clearGraphAttrs()</tt>. </p>
-
-<p>Note that graph visualization features are compiled out of Release builds
-to reduce file size.  This means that you need a Debug+Asserts or 
-Release+Asserts build to use these features.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="datastructure">Picking the Right Data Structure for a Task</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>LLVM has a plethora of data structures in the <tt>llvm/ADT/</tt> directory,
- and we commonly use STL data structures.  This section describes the trade-offs
- you should consider when you pick one.</p>
-
-<p>
-The first step is a choose your own adventure: do you want a sequential
-container, a set-like container, or a map-like container?  The most important
-thing when choosing a container is the algorithmic properties of how you plan to
-access the container.  Based on that, you should use:</p>
-
-<ul>
-<li>a <a href="#ds_map">map-like</a> container if you need efficient look-up
-    of an value based on another value.  Map-like containers also support
-    efficient queries for containment (whether a key is in the map).  Map-like
-    containers generally do not support efficient reverse mapping (values to
-    keys).  If you need that, use two maps.  Some map-like containers also
-    support efficient iteration through the keys in sorted order.  Map-like
-    containers are the most expensive sort, only use them if you need one of
-    these capabilities.</li>
-
-<li>a <a href="#ds_set">set-like</a> container if you need to put a bunch of
-    stuff into a container that automatically eliminates duplicates.  Some
-    set-like containers support efficient iteration through the elements in
-    sorted order.  Set-like containers are more expensive than sequential
-    containers.
-</li>
-
-<li>a <a href="#ds_sequential">sequential</a> container provides
-    the most efficient way to add elements and keeps track of the order they are
-    added to the collection.  They permit duplicates and support efficient
-    iteration, but do not support efficient look-up based on a key.
-</li>
-
-<li>a <a href="#ds_string">string</a> container is a specialized sequential
-    container or reference structure that is used for character or byte
-    arrays.</li>
-
-<li>a <a href="#ds_bit">bit</a> container provides an efficient way to store and
-    perform set operations on sets of numeric id's, while automatically
-    eliminating duplicates.  Bit containers require a maximum of 1 bit for each
-    identifier you want to store.
-</li>
-</ul>
-
-<p>
-Once the proper category of container is determined, you can fine tune the
-memory use, constant factors, and cache behaviors of access by intelligently
-picking a member of the category.  Note that constant factors and cache behavior
-can be a big deal.  If you have a vector that usually only contains a few
-elements (but could contain many), for example, it's much better to use
-<a href="#dss_smallvector">SmallVector</a> than <a href="#dss_vector">vector</a>
-.  Doing so avoids (relatively) expensive malloc/free calls, which dwarf the
-cost of adding the elements to the container. </p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ds_sequential">Sequential Containers (std::vector, std::list, etc)</a>
-</h3>
-
-<div>
-There are a variety of sequential containers available for you, based on your
-needs.  Pick the first in this section that will do what you want.
-  
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_arrayref">llvm/ADT/ArrayRef.h</a>
-</h4>
-
-<div>
-<p>The llvm::ArrayRef class is the preferred class to use in an interface that
-   accepts a sequential list of elements in memory and just reads from them.  By
-   taking an ArrayRef, the API can be passed a fixed size array, an std::vector,
-   an llvm::SmallVector and anything else that is contiguous in memory.
-</p>
-</div>
-
-
-  
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_fixedarrays">Fixed Size Arrays</a>
-</h4>
-
-<div>
-<p>Fixed size arrays are very simple and very fast.  They are good if you know
-exactly how many elements you have, or you have a (low) upper bound on how many
-you have.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_heaparrays">Heap Allocated Arrays</a>
-</h4>
-
-<div>
-<p>Heap allocated arrays (new[] + delete[]) are also simple.  They are good if
-the number of elements is variable, if you know how many elements you will need
-before the array is allocated, and if the array is usually large (if not,
-consider a <a href="#dss_smallvector">SmallVector</a>).  The cost of a heap
-allocated array is the cost of the new/delete (aka malloc/free).  Also note that
-if you are allocating an array of a type with a constructor, the constructor and
-destructors will be run for every element in the array (re-sizable vectors only
-construct those elements actually used).</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_tinyptrvector">"llvm/ADT/TinyPtrVector.h"</a>
-</h4>
-
-
-<div>
-<p><tt>TinyPtrVector&lt;Type&gt;</tt> is a highly specialized collection class
-that is optimized to avoid allocation in the case when a vector has zero or one
-elements.  It has two major restrictions: 1) it can only hold values of pointer
-type, and 2) it cannot hold a null pointer.</p>
-  
-<p>Since this container is highly specialized, it is rarely used.</p>
-  
-</div>
-    
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_smallvector">"llvm/ADT/SmallVector.h"</a>
-</h4>
-
-<div>
-<p><tt>SmallVector&lt;Type, N&gt;</tt> is a simple class that looks and smells
-just like <tt>vector&lt;Type&gt;</tt>:
-it supports efficient iteration, lays out elements in memory order (so you can
-do pointer arithmetic between elements), supports efficient push_back/pop_back
-operations, supports efficient random access to its elements, etc.</p>
-
-<p>The advantage of SmallVector is that it allocates space for
-some number of elements (N) <b>in the object itself</b>.  Because of this, if
-the SmallVector is dynamically smaller than N, no malloc is performed.  This can
-be a big win in cases where the malloc/free call is far more expensive than the
-code that fiddles around with the elements.</p>
-
-<p>This is good for vectors that are "usually small" (e.g. the number of
-predecessors/successors of a block is usually less than 8).  On the other hand,
-this makes the size of the SmallVector itself large, so you don't want to
-allocate lots of them (doing so will waste a lot of space).  As such,
-SmallVectors are most useful when on the stack.</p>
-
-<p>SmallVector also provides a nice portable and efficient replacement for
-<tt>alloca</tt>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_vector">&lt;vector&gt;</a>
-</h4>
-
-<div>
-<p>
-std::vector is well loved and respected.  It is useful when SmallVector isn't:
-when the size of the vector is often large (thus the small optimization will
-rarely be a benefit) or if you will be allocating many instances of the vector
-itself (which would waste space for elements that aren't in the container).
-vector is also useful when interfacing with code that expects vectors :).
-</p>
-
-<p>One worthwhile note about std::vector: avoid code like this:</p>
-
-<div class="doc_code">
-<pre>
-for ( ... ) {
-   std::vector&lt;foo&gt; V;
-   // make use of V.
-}
-</pre>
-</div>
-
-<p>Instead, write this as:</p>
-
-<div class="doc_code">
-<pre>
-std::vector&lt;foo&gt; V;
-for ( ... ) {
-   // make use of V.
-   V.clear();
-}
-</pre>
-</div>
-
-<p>Doing so will save (at least) one heap allocation and free per iteration of
-the loop.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_deque">&lt;deque&gt;</a>
-</h4>
-
-<div>
-<p>std::deque is, in some senses, a generalized version of std::vector.  Like
-std::vector, it provides constant time random access and other similar
-properties, but it also provides efficient access to the front of the list.  It
-does not guarantee continuity of elements within memory.</p>
-
-<p>In exchange for this extra flexibility, std::deque has significantly higher
-constant factor costs than std::vector.  If possible, use std::vector or
-something cheaper.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_list">&lt;list&gt;</a>
-</h4>
-
-<div>
-<p>std::list is an extremely inefficient class that is rarely useful.
-It performs a heap allocation for every element inserted into it, thus having an
-extremely high constant factor, particularly for small data types.  std::list
-also only supports bidirectional iteration, not random access iteration.</p>
-
-<p>In exchange for this high cost, std::list supports efficient access to both
-ends of the list (like std::deque, but unlike std::vector or SmallVector).  In
-addition, the iterator invalidation characteristics of std::list are stronger
-than that of a vector class: inserting or removing an element into the list does
-not invalidate iterator or pointers to other elements in the list.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_ilist">llvm/ADT/ilist.h</a>
-</h4>
-
-<div>
-<p><tt>ilist&lt;T&gt;</tt> implements an 'intrusive' doubly-linked list.  It is
-intrusive, because it requires the element to store and provide access to the
-prev/next pointers for the list.</p>
-
-<p><tt>ilist</tt> has the same drawbacks as <tt>std::list</tt>, and additionally
-requires an <tt>ilist_traits</tt> implementation for the element type, but it
-provides some novel characteristics.  In particular, it can efficiently store
-polymorphic objects, the traits class is informed when an element is inserted or
-removed from the list, and <tt>ilist</tt>s are guaranteed to support a
-constant-time splice operation.</p>
-
-<p>These properties are exactly what we want for things like
-<tt>Instruction</tt>s and basic blocks, which is why these are implemented with
-<tt>ilist</tt>s.</p>
-
-Related classes of interest are explained in the following subsections:
-    <ul>
-      <li><a href="#dss_ilist_traits">ilist_traits</a></li>
-      <li><a href="#dss_iplist">iplist</a></li>
-      <li><a href="#dss_ilist_node">llvm/ADT/ilist_node.h</a></li>
-      <li><a href="#dss_ilist_sentinel">Sentinels</a></li>
-    </ul>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_packedvector">llvm/ADT/PackedVector.h</a>
-</h4>
-
-<div>
-<p>
-Useful for storing a vector of values using only a few number of bits for each
-value. Apart from the standard operations of a vector-like container, it can
-also perform an 'or' set operation. 
-</p>
-
-<p>For example:</p>
-
-<div class="doc_code">
-<pre>
-enum State {
-    None = 0x0,
-    FirstCondition = 0x1,
-    SecondCondition = 0x2,
-    Both = 0x3
-};
-
-State get() {
-    PackedVector&lt;State, 2&gt; Vec1;
-    Vec1.push_back(FirstCondition);
-
-    PackedVector&lt;State, 2&gt; Vec2;
-    Vec2.push_back(SecondCondition);
-
-    Vec1 |= Vec2;
-    return Vec1[0]; // returns 'Both'.
-}
-</pre>
-</div>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_ilist_traits">ilist_traits</a>
-</h4>
-
-<div>
-<p><tt>ilist_traits&lt;T&gt;</tt> is <tt>ilist&lt;T&gt;</tt>'s customization
-mechanism. <tt>iplist&lt;T&gt;</tt> (and consequently <tt>ilist&lt;T&gt;</tt>)
-publicly derive from this traits class.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_iplist">iplist</a>
-</h4>
-
-<div>
-<p><tt>iplist&lt;T&gt;</tt> is <tt>ilist&lt;T&gt;</tt>'s base and as such
-supports a slightly narrower interface. Notably, inserters from
-<tt>T&amp;</tt> are absent.</p>
-
-<p><tt>ilist_traits&lt;T&gt;</tt> is a public base of this class and can be
-used for a wide variety of customizations.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_ilist_node">llvm/ADT/ilist_node.h</a>
-</h4>
-
-<div>
-<p><tt>ilist_node&lt;T&gt;</tt> implements a the forward and backward links
-that are expected by the <tt>ilist&lt;T&gt;</tt> (and analogous containers)
-in the default manner.</p>
-
-<p><tt>ilist_node&lt;T&gt;</tt>s are meant to be embedded in the node type
-<tt>T</tt>, usually <tt>T</tt> publicly derives from
-<tt>ilist_node&lt;T&gt;</tt>.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_ilist_sentinel">Sentinels</a>
-</h4>
-
-<div>
-<p><tt>ilist</tt>s have another specialty that must be considered. To be a good
-citizen in the C++ ecosystem, it needs to support the standard container
-operations, such as <tt>begin</tt> and <tt>end</tt> iterators, etc. Also, the
-<tt>operator--</tt> must work correctly on the <tt>end</tt> iterator in the
-case of non-empty <tt>ilist</tt>s.</p>
-
-<p>The only sensible solution to this problem is to allocate a so-called
-<i>sentinel</i> along with the intrusive list, which serves as the <tt>end</tt>
-iterator, providing the back-link to the last element. However conforming to the
-C++ convention it is illegal to <tt>operator++</tt> beyond the sentinel and it
-also must not be dereferenced.</p>
-
-<p>These constraints allow for some implementation freedom to the <tt>ilist</tt>
-how to allocate and store the sentinel. The corresponding policy is dictated
-by <tt>ilist_traits&lt;T&gt;</tt>. By default a <tt>T</tt> gets heap-allocated
-whenever the need for a sentinel arises.</p>
-
-<p>While the default policy is sufficient in most cases, it may break down when
-<tt>T</tt> does not provide a default constructor. Also, in the case of many
-instances of <tt>ilist</tt>s, the memory overhead of the associated sentinels
-is wasted. To alleviate the situation with numerous and voluminous
-<tt>T</tt>-sentinels, sometimes a trick is employed, leading to <i>ghostly
-sentinels</i>.</p>
-
-<p>Ghostly sentinels are obtained by specially-crafted <tt>ilist_traits&lt;T&gt;</tt>
-which superpose the sentinel with the <tt>ilist</tt> instance in memory. Pointer
-arithmetic is used to obtain the sentinel, which is relative to the
-<tt>ilist</tt>'s <tt>this</tt> pointer. The <tt>ilist</tt> is augmented by an
-extra pointer, which serves as the back-link of the sentinel. This is the only
-field in the ghostly sentinel which can be legally accessed.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_other">Other Sequential Container options</a>
-</h4>
-
-<div>
-<p>Other STL containers are available, such as std::string.</p>
-
-<p>There are also various STL adapter classes such as std::queue,
-std::priority_queue, std::stack, etc.  These provide simplified access to an
-underlying container but don't affect the cost of the container itself.</p>
-
-</div>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ds_string">String-like containers</a>
-</h3>
-
-<div>
-
-<p>
-There are a variety of ways to pass around and use strings in C and C++, and
-LLVM adds a few new options to choose from.  Pick the first option on this list
-that will do what you need, they are ordered according to their relative cost.
-</p>
-<p>
-Note that is is generally preferred to <em>not</em> pass strings around as 
-"<tt>const char*</tt>"'s.  These have a number of problems, including the fact
-that they cannot represent embedded nul ("\0") characters, and do not have a
-length available efficiently.  The general replacement for '<tt>const 
-char*</tt>' is StringRef.
-</p>
-  
-<p>For more information on choosing string containers for APIs, please see
-<a href="#string_apis">Passing strings</a>.</p>
-  
-  
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_stringref">llvm/ADT/StringRef.h</a>
-</h4>
-
-<div>
-<p>
-The StringRef class is a simple value class that contains a pointer to a
-character and a length, and is quite related to the <a 
-href="#dss_arrayref">ArrayRef</a> class (but specialized for arrays of
-characters).  Because StringRef carries a length with it, it safely handles
-strings with embedded nul characters in it, getting the length does not require
-a strlen call, and it even has very convenient APIs for slicing and dicing the
-character range that it represents.
-</p>
-  
-<p>
-StringRef is ideal for passing simple strings around that are known to be live,
-either because they are C string literals, std::string, a C array, or a
-SmallVector.  Each of these cases has an efficient implicit conversion to
-StringRef, which doesn't result in a dynamic strlen being executed.
-</p>
-  
-<p>StringRef has a few major limitations which make more powerful string
-containers useful:</p>
-  
-<ol>
-<li>You cannot directly convert a StringRef to a 'const char*' because there is
-no way to add a trailing nul (unlike the .c_str() method on various stronger
-classes).</li>
-
-  
-<li>StringRef doesn't own or keep alive the underlying string bytes.
-As such it can easily lead to dangling pointers, and is not suitable for
-embedding in datastructures in most cases (instead, use an std::string or
-something like that).</li>
-  
-<li>For the same reason, StringRef cannot be used as the return value of a
-method if the method "computes" the result string.  Instead, use
-std::string.</li>
-    
-<li>StringRef's do not allow you to mutate the pointed-to string bytes and it
-doesn't allow you to insert or remove bytes from the range.  For editing 
-operations like this, it interoperates with the <a 
-href="#dss_twine">Twine</a> class.</li>
-</ol>
-  
-<p>Because of its strengths and limitations, it is very common for a function to
-take a StringRef and for a method on an object to return a StringRef that
-points into some string that it owns.</p>
-  
-</div>
-  
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_twine">llvm/ADT/Twine.h</a>
-</h4>
-
-<div>
-  <p>
-  The Twine class is used as an intermediary datatype for APIs that want to take
-  a string that can be constructed inline with a series of concatenations.
-  Twine works by forming recursive instances of the Twine datatype (a simple
-  value object) on the stack as temporary objects, linking them together into a
-  tree which is then linearized when the Twine is consumed.  Twine is only safe
-  to use as the argument to a function, and should always be a const reference,
-  e.g.:
-  </p>
-  
-  <pre>
-    void foo(const Twine &amp;T);
-    ...
-    StringRef X = ...
-    unsigned i = ...
-    foo(X + "." + Twine(i));
-  </pre>
-  
-  <p>This example forms a string like "blarg.42" by concatenating the values
-  together, and does not form intermediate strings containing "blarg" or
-  "blarg.".
-  </p>
-  
-  <p>Because Twine is constructed with temporary objects on the stack, and
-  because these instances are destroyed at the end of the current statement,
-  it is an inherently dangerous API.  For example, this simple variant contains
-  undefined behavior and will probably crash:</p>
-  
-  <pre>
-    void foo(const Twine &amp;T);
-    ...
-    StringRef X = ...
-    unsigned i = ...
-    const Twine &amp;Tmp = X + "." + Twine(i);
-    foo(Tmp);
-  </pre>
-
-  <p>... because the temporaries are destroyed before the call.  That said,
-  Twine's are much more efficient than intermediate std::string temporaries, and
-  they work really well with StringRef.  Just be aware of their limitations.</p>
-  
-</div>
-
-  
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_smallstring">llvm/ADT/SmallString.h</a>
-</h4>
-
-<div>
-  
-<p>SmallString is a subclass of <a href="#dss_smallvector">SmallVector</a> that
-adds some convenience APIs like += that takes StringRef's.  SmallString avoids
-allocating memory in the case when the preallocated space is enough to hold its
-data, and it calls back to general heap allocation when required.  Since it owns
-its data, it is very safe to use and supports full mutation of the string.</p>
-  
-<p>Like SmallVector's, the big downside to SmallString is their sizeof.  While
-they are optimized for small strings, they themselves are not particularly
-small.  This means that they work great for temporary scratch buffers on the
-stack, but should not generally be put into the heap: it is very rare to 
-see a SmallString as the member of a frequently-allocated heap data structure
-or returned by-value.
-</p>
-
-</div>
-  
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_stdstring">std::string</a>
-</h4>
-
-<div>
-  
-  <p>The standard C++ std::string class is a very general class that (like
-  SmallString) owns its underlying data.  sizeof(std::string) is very reasonable
-  so it can be embedded into heap data structures and returned by-value.
-  On the other hand, std::string is highly inefficient for inline editing (e.g.
-  concatenating a bunch of stuff together) and because it is provided by the
-  standard library, its performance characteristics depend a lot of the host
-  standard library (e.g. libc++ and MSVC provide a highly optimized string
-  class, GCC contains a really slow implementation).
-  </p>
-
-  <p>The major disadvantage of std::string is that almost every operation that
-  makes them larger can allocate memory, which is slow.  As such, it is better
-  to use SmallVector or Twine as a scratch buffer, but then use std::string to
-  persist the result.</p>
-
-  
-</div>
-  
-<!-- end of strings -->
-</div>
-
-  
-<!-- ======================================================================= -->
-<h3>
-  <a name="ds_set">Set-Like Containers (std::set, SmallSet, SetVector, etc)</a>
-</h3>
-
-<div>
-
-<p>Set-like containers are useful when you need to canonicalize multiple values
-into a single representation.  There are several different choices for how to do
-this, providing various trade-offs.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_sortedvectorset">A sorted 'vector'</a>
-</h4>
-
-<div>
-
-<p>If you intend to insert a lot of elements, then do a lot of queries, a
-great approach is to use a vector (or other sequential container) with
-std::sort+std::unique to remove duplicates.  This approach works really well if
-your usage pattern has these two distinct phases (insert then query), and can be
-coupled with a good choice of <a href="#ds_sequential">sequential container</a>.
-</p>
-
-<p>
-This combination provides the several nice properties: the result data is
-contiguous in memory (good for cache locality), has few allocations, is easy to
-address (iterators in the final vector are just indices or pointers), and can be
-efficiently queried with a standard binary or radix search.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_smallset">"llvm/ADT/SmallSet.h"</a>
-</h4>
-
-<div>
-
-<p>If you have a set-like data structure that is usually small and whose elements
-are reasonably small, a <tt>SmallSet&lt;Type, N&gt;</tt> is a good choice.  This set
-has space for N elements in place (thus, if the set is dynamically smaller than
-N, no malloc traffic is required) and accesses them with a simple linear search.
-When the set grows beyond 'N' elements, it allocates a more expensive representation that
-guarantees efficient access (for most types, it falls back to std::set, but for
-pointers it uses something far better, <a
-href="#dss_smallptrset">SmallPtrSet</a>).</p>
-
-<p>The magic of this class is that it handles small sets extremely efficiently,
-but gracefully handles extremely large sets without loss of efficiency.  The
-drawback is that the interface is quite small: it supports insertion, queries
-and erasing, but does not support iteration.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_smallptrset">"llvm/ADT/SmallPtrSet.h"</a>
-</h4>
-
-<div>
-
-<p>SmallPtrSet has all the advantages of <tt>SmallSet</tt> (and a <tt>SmallSet</tt> of pointers is 
-transparently implemented with a <tt>SmallPtrSet</tt>), but also supports iterators.  If
-more than 'N' insertions are performed, a single quadratically
-probed hash table is allocated and grows as needed, providing extremely
-efficient access (constant time insertion/deleting/queries with low constant
-factors) and is very stingy with malloc traffic.</p>
-
-<p>Note that, unlike <tt>std::set</tt>, the iterators of <tt>SmallPtrSet</tt> are invalidated
-whenever an insertion occurs.  Also, the values visited by the iterators are not
-visited in sorted order.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_denseset">"llvm/ADT/DenseSet.h"</a>
-</h4>
-
-<div>
-
-<p>
-DenseSet is a simple quadratically probed hash table.  It excels at supporting
-small values: it uses a single allocation to hold all of the pairs that
-are currently inserted in the set.  DenseSet is a great way to unique small
-values that are not simple pointers (use <a 
-href="#dss_smallptrset">SmallPtrSet</a> for pointers).  Note that DenseSet has
-the same requirements for the value type that <a 
-href="#dss_densemap">DenseMap</a> has.
-</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_sparseset">"llvm/ADT/SparseSet.h"</a>
-</h4>
-
-<div>
-
-<p>SparseSet holds a small number of objects identified by unsigned keys of
-moderate size. It uses a lot of memory, but provides operations that are
-almost as fast as a vector. Typical keys are physical registers, virtual
-registers, or numbered basic blocks.</p>
-
-<p>SparseSet is useful for algorithms that need very fast clear/find/insert/erase
-and fast iteration over small sets.  It is not intended for building composite
-data structures.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_FoldingSet">"llvm/ADT/FoldingSet.h"</a>
-</h4>
-
-<div>
-
-<p>
-FoldingSet is an aggregate class that is really good at uniquing
-expensive-to-create or polymorphic objects.  It is a combination of a chained
-hash table with intrusive links (uniqued objects are required to inherit from
-FoldingSetNode) that uses <a href="#dss_smallvector">SmallVector</a> as part of
-its ID process.</p>
-
-<p>Consider a case where you want to implement a "getOrCreateFoo" method for
-a complex object (for example, a node in the code generator).  The client has a
-description of *what* it wants to generate (it knows the opcode and all the
-operands), but we don't want to 'new' a node, then try inserting it into a set
-only to find out it already exists, at which point we would have to delete it
-and return the node that already exists.
-</p>
-
-<p>To support this style of client, FoldingSet perform a query with a
-FoldingSetNodeID (which wraps SmallVector) that can be used to describe the
-element that we want to query for.  The query either returns the element
-matching the ID or it returns an opaque ID that indicates where insertion should
-take place.  Construction of the ID usually does not require heap traffic.</p>
-
-<p>Because FoldingSet uses intrusive links, it can support polymorphic objects
-in the set (for example, you can have SDNode instances mixed with LoadSDNodes).
-Because the elements are individually allocated, pointers to the elements are
-stable: inserting or removing elements does not invalidate any pointers to other
-elements.
-</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_set">&lt;set&gt;</a>
-</h4>
-
-<div>
-
-<p><tt>std::set</tt> is a reasonable all-around set class, which is decent at
-many things but great at nothing.  std::set allocates memory for each element
-inserted (thus it is very malloc intensive) and typically stores three pointers
-per element in the set (thus adding a large amount of per-element space
-overhead).  It offers guaranteed log(n) performance, which is not particularly
-fast from a complexity standpoint (particularly if the elements of the set are
-expensive to compare, like strings), and has extremely high constant factors for
-lookup, insertion and removal.</p>
-
-<p>The advantages of std::set are that its iterators are stable (deleting or
-inserting an element from the set does not affect iterators or pointers to other
-elements) and that iteration over the set is guaranteed to be in sorted order.
-If the elements in the set are large, then the relative overhead of the pointers
-and malloc traffic is not a big deal, but if the elements of the set are small,
-std::set is almost never a good choice.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_setvector">"llvm/ADT/SetVector.h"</a>
-</h4>
-
-<div>
-<p>LLVM's SetVector&lt;Type&gt; is an adapter class that combines your choice of
-a set-like container along with a <a href="#ds_sequential">Sequential 
-Container</a>.  The important property
-that this provides is efficient insertion with uniquing (duplicate elements are
-ignored) with iteration support.  It implements this by inserting elements into
-both a set-like container and the sequential container, using the set-like
-container for uniquing and the sequential container for iteration.
-</p>
-
-<p>The difference between SetVector and other sets is that the order of
-iteration is guaranteed to match the order of insertion into the SetVector.
-This property is really important for things like sets of pointers.  Because
-pointer values are non-deterministic (e.g. vary across runs of the program on
-different machines), iterating over the pointers in the set will
-not be in a well-defined order.</p>
-
-<p>
-The drawback of SetVector is that it requires twice as much space as a normal
-set and has the sum of constant factors from the set-like container and the 
-sequential container that it uses.  Use it *only* if you need to iterate over 
-the elements in a deterministic order.  SetVector is also expensive to delete
-elements out of (linear time), unless you use it's "pop_back" method, which is
-faster.
-</p>
-
-<p><tt>SetVector</tt> is an adapter class that defaults to
-   using <tt>std::vector</tt> and a size 16 <tt>SmallSet</tt> for the underlying
-   containers, so it is quite expensive. However,
-   <tt>"llvm/ADT/SetVector.h"</tt> also provides a <tt>SmallSetVector</tt>
-   class, which defaults to using a <tt>SmallVector</tt> and <tt>SmallSet</tt>
-   of a specified size. If you use this, and if your sets are dynamically
-   smaller than <tt>N</tt>, you will save a lot of heap traffic.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_uniquevector">"llvm/ADT/UniqueVector.h"</a>
-</h4>
-
-<div>
-
-<p>
-UniqueVector is similar to <a href="#dss_setvector">SetVector</a>, but it
-retains a unique ID for each element inserted into the set.  It internally
-contains a map and a vector, and it assigns a unique ID for each value inserted
-into the set.</p>
-
-<p>UniqueVector is very expensive: its cost is the sum of the cost of
-maintaining both the map and vector, it has high complexity, high constant
-factors, and produces a lot of malloc traffic.  It should be avoided.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_immutableset">"llvm/ADT/ImmutableSet.h"</a>
-</h4>
-
-<div>
-
-<p>
-ImmutableSet is an immutable (functional) set implementation based on an AVL
-tree.
-Adding or removing elements is done through a Factory object and results in the
-creation of a new ImmutableSet object.
-If an ImmutableSet already exists with the given contents, then the existing one
-is returned; equality is compared with a FoldingSetNodeID.
-The time and space complexity of add or remove operations is logarithmic in the
-size of the original set.
-
-<p>
-There is no method for returning an element of the set, you can only check for
-membership.
-
-</div>
-
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_otherset">Other Set-Like Container Options</a>
-</h4>
-
-<div>
-
-<p>
-The STL provides several other options, such as std::multiset and the various 
-"hash_set" like containers (whether from C++ TR1 or from the SGI library). We
-never use hash_set and unordered_set because they are generally very expensive 
-(each insertion requires a malloc) and very non-portable.
-</p>
-
-<p>std::multiset is useful if you're not interested in elimination of
-duplicates, but has all the drawbacks of std::set.  A sorted vector (where you 
-don't delete duplicate entries) or some other approach is almost always
-better.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ds_map">Map-Like Containers (std::map, DenseMap, etc)</a>
-</h3>
-
-<div>
-Map-like containers are useful when you want to associate data to a key.  As
-usual, there are a lot of different ways to do this. :)
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_sortedvectormap">A sorted 'vector'</a>
-</h4>
-
-<div>
-
-<p>
-If your usage pattern follows a strict insert-then-query approach, you can
-trivially use the same approach as <a href="#dss_sortedvectorset">sorted vectors
-for set-like containers</a>.  The only difference is that your query function
-(which uses std::lower_bound to get efficient log(n) lookup) should only compare
-the key, not both the key and value.  This yields the same advantages as sorted
-vectors for sets.
-</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_stringmap">"llvm/ADT/StringMap.h"</a>
-</h4>
-
-<div>
-
-<p>
-Strings are commonly used as keys in maps, and they are difficult to support
-efficiently: they are variable length, inefficient to hash and compare when
-long, expensive to copy, etc.  StringMap is a specialized container designed to
-cope with these issues.  It supports mapping an arbitrary range of bytes to an
-arbitrary other object.</p>
-
-<p>The StringMap implementation uses a quadratically-probed hash table, where
-the buckets store a pointer to the heap allocated entries (and some other
-stuff).  The entries in the map must be heap allocated because the strings are
-variable length.  The string data (key) and the element object (value) are
-stored in the same allocation with the string data immediately after the element
-object.  This container guarantees the "<tt>(char*)(&amp;Value+1)</tt>" points
-to the key string for a value.</p>
-
-<p>The StringMap is very fast for several reasons: quadratic probing is very
-cache efficient for lookups, the hash value of strings in buckets is not
-recomputed when looking up an element, StringMap rarely has to touch the
-memory for unrelated objects when looking up a value (even when hash collisions
-happen), hash table growth does not recompute the hash values for strings
-already in the table, and each pair in the map is store in a single allocation
-(the string data is stored in the same allocation as the Value of a pair).</p>
-
-<p>StringMap also provides query methods that take byte ranges, so it only ever
-copies a string if a value is inserted into the table.</p>
-
-<p>StringMap iteratation order, however, is not guaranteed to be deterministic,
-so any uses which require that should instead use a std::map.</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_indexedmap">"llvm/ADT/IndexedMap.h"</a>
-</h4>
-
-<div>
-<p>
-IndexedMap is a specialized container for mapping small dense integers (or
-values that can be mapped to small dense integers) to some other type.  It is
-internally implemented as a vector with a mapping function that maps the keys to
-the dense integer range.
-</p>
-
-<p>
-This is useful for cases like virtual registers in the LLVM code generator: they
-have a dense mapping that is offset by a compile-time constant (the first
-virtual register ID).</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_densemap">"llvm/ADT/DenseMap.h"</a>
-</h4>
-
-<div>
-
-<p>
-DenseMap is a simple quadratically probed hash table.  It excels at supporting
-small keys and values: it uses a single allocation to hold all of the pairs that
-are currently inserted in the map.  DenseMap is a great way to map pointers to
-pointers, or map other small types to each other.
-</p>
-
-<p>
-There are several aspects of DenseMap that you should be aware of, however.  The
-iterators in a DenseMap are invalidated whenever an insertion occurs, unlike
-map.  Also, because DenseMap allocates space for a large number of key/value
-pairs (it starts with 64 by default), it will waste a lot of space if your keys
-or values are large.  Finally, you must implement a partial specialization of
-DenseMapInfo for the key that you want, if it isn't already supported.  This
-is required to tell DenseMap about two special marker values (which can never be
-inserted into the map) that it needs internally.</p>
-
-<p>
-DenseMap's find_as() method supports lookup operations using an alternate key
-type. This is useful in cases where the normal key type is expensive to
-construct, but cheap to compare against. The DenseMapInfo is responsible for
-defining the appropriate comparison and hashing methods for each alternate
-key type used.
-</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_valuemap">"llvm/ADT/ValueMap.h"</a>
-</h4>
-
-<div>
-
-<p>
-ValueMap is a wrapper around a <a href="#dss_densemap">DenseMap</a> mapping
-Value*s (or subclasses) to another type.  When a Value is deleted or RAUW'ed,
-ValueMap will update itself so the new version of the key is mapped to the same
-value, just as if the key were a WeakVH.  You can configure exactly how this
-happens, and what else happens on these two events, by passing
-a <code>Config</code> parameter to the ValueMap template.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_intervalmap">"llvm/ADT/IntervalMap.h"</a>
-</h4>
-
-<div>
-
-<p> IntervalMap is a compact map for small keys and values. It maps key
-intervals instead of single keys, and it will automatically coalesce adjacent
-intervals. When then map only contains a few intervals, they are stored in the
-map object itself to avoid allocations.</p>
-
-<p> The IntervalMap iterators are quite big, so they should not be passed around
-as STL iterators. The heavyweight iterators allow a smaller data structure.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_map">&lt;map&gt;</a>
-</h4>
-
-<div>
-
-<p>
-std::map has similar characteristics to <a href="#dss_set">std::set</a>: it uses
-a single allocation per pair inserted into the map, it offers log(n) lookup with
-an extremely large constant factor, imposes a space penalty of 3 pointers per
-pair in the map, etc.</p>
-
-<p>std::map is most useful when your keys or values are very large, if you need
-to iterate over the collection in sorted order, or if you need stable iterators
-into the map (i.e. they don't get invalidated if an insertion or deletion of
-another element takes place).</p>
-
-</div>
-
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_mapvector">"llvm/ADT/MapVector.h"</a>
-</h4>
-<div>
-
-<p> MapVector&lt;KeyT,ValueT&gt provides a subset of the DenseMap interface.
-  The main difference is that the iteration order is guaranteed to be
-  the insertion order, making it an easy (but somewhat expensive) solution
-  for non-deterministic iteration over maps of pointers. </p>
-
-<p> It is implemented by mapping from key to an index in a vector of key,value
-  pairs. This provides fast lookup and iteration, but has two main drawbacks:
-  The key is stored twice and it doesn't support removing elements. </p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_inteqclasses">"llvm/ADT/IntEqClasses.h"</a>
-</h4>
-
-<div>
-
-<p>IntEqClasses provides a compact representation of equivalence classes of
-small integers. Initially, each integer in the range 0..n-1 has its own
-equivalence class. Classes can be joined by passing two class representatives to
-the join(a, b) method. Two integers are in the same class when findLeader()
-returns the same representative.</p>
-
-<p>Once all equivalence classes are formed, the map can be compressed so each
-integer 0..n-1 maps to an equivalence class number in the range 0..m-1, where m
-is the total number of equivalence classes. The map must be uncompressed before
-it can be edited again.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_immutablemap">"llvm/ADT/ImmutableMap.h"</a>
-</h4>
-
-<div>
-
-<p>
-ImmutableMap is an immutable (functional) map implementation based on an AVL
-tree.
-Adding or removing elements is done through a Factory object and results in the
-creation of a new ImmutableMap object.
-If an ImmutableMap already exists with the given key set, then the existing one
-is returned; equality is compared with a FoldingSetNodeID.
-The time and space complexity of add or remove operations is logarithmic in the
-size of the original map.
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_othermap">Other Map-Like Container Options</a>
-</h4>
-
-<div>
-
-<p>
-The STL provides several other options, such as std::multimap and the various 
-"hash_map" like containers (whether from C++ TR1 or from the SGI library). We
-never use hash_set and unordered_set because they are generally very expensive 
-(each insertion requires a malloc) and very non-portable.</p>
-
-<p>std::multimap is useful if you want to map a key to multiple values, but has
-all the drawbacks of std::map.  A sorted vector or some other approach is almost
-always better.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="ds_bit">Bit storage containers (BitVector, SparseBitVector)</a>
-</h3>
-
-<div>
-<p>Unlike the other containers, there are only two bit storage containers, and 
-choosing when to use each is relatively straightforward.</p>
-
-<p>One additional option is 
-<tt>std::vector&lt;bool&gt;</tt>: we discourage its use for two reasons 1) the
-implementation in many common compilers (e.g. commonly available versions of 
-GCC) is extremely inefficient and 2) the C++ standards committee is likely to
-deprecate this container and/or change it significantly somehow.  In any case,
-please don't use it.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_bitvector">BitVector</a>
-</h4>
-
-<div>
-<p> The BitVector container provides a dynamic size set of bits for manipulation.
-It supports individual bit setting/testing, as well as set operations.  The set
-operations take time O(size of bitvector), but operations are performed one word
-at a time, instead of one bit at a time.  This makes the BitVector very fast for
-set operations compared to other containers.  Use the BitVector when you expect
-the number of set bits to be high (IE a dense set).
-</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_smallbitvector">SmallBitVector</a>
-</h4>
-
-<div>
-<p> The SmallBitVector container provides the same interface as BitVector, but
-it is optimized for the case where only a small number of bits, less than
-25 or so, are needed. It also transparently supports larger bit counts, but
-slightly less efficiently than a plain BitVector, so SmallBitVector should
-only be used when larger counts are rare.
-</p>
-
-<p>
-At this time, SmallBitVector does not support set operations (and, or, xor),
-and its operator[] does not provide an assignable lvalue.
-</p>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="dss_sparsebitvector">SparseBitVector</a>
-</h4>
-
-<div>
-<p> The SparseBitVector container is much like BitVector, with one major
-difference: Only the bits that are set, are stored.  This makes the
-SparseBitVector much more space efficient than BitVector when the set is sparse,
-as well as making set operations O(number of set bits) instead of O(size of
-universe).  The downside to the SparseBitVector is that setting and testing of random bits is O(N), and on large SparseBitVectors, this can be slower than BitVector. In our implementation, setting or testing bits in sorted order
-(either forwards or reverse) is O(1) worst case.  Testing and setting bits within 128 bits (depends on size) of the current bit is also O(1).  As a general statement, testing/setting bits in a SparseBitVector is O(distance away from last set bit).
-</p>
-</div>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="common">Helpful Hints for Common Operations</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This section describes how to perform some very simple transformations of
-LLVM code.  This is meant to give examples of common idioms used, showing the
-practical side of LLVM transformations.  <p> Because this is a "how-to" section,
-you should also read about the main classes that you will be working with.  The
-<a href="#coreclasses">Core LLVM Class Hierarchy Reference</a> contains details
-and descriptions of the main classes that you should know about.</p>
-
-<!-- NOTE: this section should be heavy on example code -->
-<!-- ======================================================================= -->
-<h3>
-  <a name="inspection">Basic Inspection and Traversal Routines</a>
-</h3>
-
-<div>
-
-<p>The LLVM compiler infrastructure have many different data structures that may
-be traversed.  Following the example of the C++ standard template library, the
-techniques used to traverse these various data structures are all basically the
-same.  For a enumerable sequence of values, the <tt>XXXbegin()</tt> function (or
-method) returns an iterator to the start of the sequence, the <tt>XXXend()</tt>
-function returns an iterator pointing to one past the last valid element of the
-sequence, and there is some <tt>XXXiterator</tt> data type that is common
-between the two operations.</p>
-
-<p>Because the pattern for iteration is common across many different aspects of
-the program representation, the standard template library algorithms may be used
-on them, and it is easier to remember how to iterate. First we show a few common
-examples of the data structures that need to be traversed.  Other data
-structures are traversed in very similar ways.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="iterate_function">Iterating over the </a><a
-  href="#BasicBlock"><tt>BasicBlock</tt></a>s in a <a
-  href="#Function"><tt>Function</tt></a>
-</h4>
-
-<div>
-
-<p>It's quite common to have a <tt>Function</tt> instance that you'd like to
-transform in some way; in particular, you'd like to manipulate its
-<tt>BasicBlock</tt>s.  To facilitate this, you'll need to iterate over all of
-the <tt>BasicBlock</tt>s that constitute the <tt>Function</tt>. The following is
-an example that prints the name of a <tt>BasicBlock</tt> and the number of
-<tt>Instruction</tt>s it contains:</p>
-
-<div class="doc_code">
-<pre>
-// <i>func is a pointer to a Function instance</i>
-for (Function::iterator i = func-&gt;begin(), e = func-&gt;end(); i != e; ++i)
-  // <i>Print out the name of the basic block if it has one, and then the</i>
-  // <i>number of instructions that it contains</i>
-  errs() &lt;&lt; "Basic block (name=" &lt;&lt; i-&gt;getName() &lt;&lt; ") has "
-             &lt;&lt; i-&gt;size() &lt;&lt; " instructions.\n";
-</pre>
-</div>
-
-<p>Note that i can be used as if it were a pointer for the purposes of
-invoking member functions of the <tt>Instruction</tt> class.  This is
-because the indirection operator is overloaded for the iterator
-classes.  In the above code, the expression <tt>i-&gt;size()</tt> is
-exactly equivalent to <tt>(*i).size()</tt> just like you'd expect.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="iterate_basicblock">Iterating over the </a><a
-  href="#Instruction"><tt>Instruction</tt></a>s in a <a
-  href="#BasicBlock"><tt>BasicBlock</tt></a>
-</h4>
-
-<div>
-
-<p>Just like when dealing with <tt>BasicBlock</tt>s in <tt>Function</tt>s, it's
-easy to iterate over the individual instructions that make up
-<tt>BasicBlock</tt>s. Here's a code snippet that prints out each instruction in
-a <tt>BasicBlock</tt>:</p>
-
-<div class="doc_code">
-<pre>
-// <i>blk is a pointer to a BasicBlock instance</i>
-for (BasicBlock::iterator i = blk-&gt;begin(), e = blk-&gt;end(); i != e; ++i)
-   // <i>The next statement works since operator&lt;&lt;(ostream&amp;,...)</i>
-   // <i>is overloaded for Instruction&amp;</i>
-   errs() &lt;&lt; *i &lt;&lt; "\n";
-</pre>
-</div>
-
-<p>However, this isn't really the best way to print out the contents of a
-<tt>BasicBlock</tt>!  Since the ostream operators are overloaded for virtually
-anything you'll care about, you could have just invoked the print routine on the
-basic block itself: <tt>errs() &lt;&lt; *blk &lt;&lt; "\n";</tt>.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="iterate_institer">Iterating over the </a><a
-  href="#Instruction"><tt>Instruction</tt></a>s in a <a
-  href="#Function"><tt>Function</tt></a>
-</h4>
-
-<div>
-
-<p>If you're finding that you commonly iterate over a <tt>Function</tt>'s
-<tt>BasicBlock</tt>s and then that <tt>BasicBlock</tt>'s <tt>Instruction</tt>s,
-<tt>InstIterator</tt> should be used instead. You'll need to include <a
-href="/doxygen/InstIterator_8h-source.html"><tt>llvm/Support/InstIterator.h</tt></a>,
-and then instantiate <tt>InstIterator</tt>s explicitly in your code.  Here's a
-small example that shows how to dump all instructions in a function to the standard error stream:<p>
-
-<div class="doc_code">
-<pre>
-#include "<a href="/doxygen/InstIterator_8h-source.html">llvm/Support/InstIterator.h</a>"
-
-// <i>F is a pointer to a Function instance</i>
-for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
-  errs() &lt;&lt; *I &lt;&lt; "\n";
-</pre>
-</div>
-
-<p>Easy, isn't it?  You can also use <tt>InstIterator</tt>s to fill a
-work list with its initial contents.  For example, if you wanted to
-initialize a work list to contain all instructions in a <tt>Function</tt>
-F, all you would need to do is something like:</p>
-
-<div class="doc_code">
-<pre>
-std::set&lt;Instruction*&gt; worklist;
-// or better yet, SmallPtrSet&lt;Instruction*, 64&gt; worklist;
-
-for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
-   worklist.insert(&amp;*I);
-</pre>
-</div>
-
-<p>The STL set <tt>worklist</tt> would now contain all instructions in the
-<tt>Function</tt> pointed to by F.</p>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="iterate_convert">Turning an iterator into a class pointer (and
-  vice-versa)</a>
-</h4>
-
-<div>
-
-<p>Sometimes, it'll be useful to grab a reference (or pointer) to a class
-instance when all you've got at hand is an iterator.  Well, extracting
-a reference or a pointer from an iterator is very straight-forward.
-Assuming that <tt>i</tt> is a <tt>BasicBlock::iterator</tt> and <tt>j</tt>
-is a <tt>BasicBlock::const_iterator</tt>:</p>
-
-<div class="doc_code">
-<pre>
-Instruction&amp; inst = *i;   // <i>Grab reference to instruction reference</i>
-Instruction* pinst = &amp;*i; // <i>Grab pointer to instruction reference</i>
-const Instruction&amp; inst = *j;
-</pre>
-</div>
-
-<p>However, the iterators you'll be working with in the LLVM framework are
-special: they will automatically convert to a ptr-to-instance type whenever they
-need to.  Instead of dereferencing the iterator and then taking the address of
-the result, you can simply assign the iterator to the proper pointer type and
-you get the dereference and address-of operation as a result of the assignment
-(behind the scenes, this is a result of overloading casting mechanisms).  Thus
-the last line of the last example,</p>
-
-<div class="doc_code">
-<pre>
-Instruction *pinst = &amp;*i;
-</pre>
-</div>
-
-<p>is semantically equivalent to</p>
-
-<div class="doc_code">
-<pre>
-Instruction *pinst = i;
-</pre>
-</div>
-
-<p>It's also possible to turn a class pointer into the corresponding iterator,
-and this is a constant time operation (very efficient).  The following code
-snippet illustrates use of the conversion constructors provided by LLVM
-iterators.  By using these, you can explicitly grab the iterator of something
-without actually obtaining it via iteration over some structure:</p>
-
-<div class="doc_code">
-<pre>
-void printNextInstruction(Instruction* inst) {
-  BasicBlock::iterator it(inst);
-  ++it; // <i>After this line, it refers to the instruction after *inst</i>
-  if (it != inst-&gt;getParent()-&gt;end()) errs() &lt;&lt; *it &lt;&lt; "\n";
-}
-</pre>
-</div>
-
-<p>Unfortunately, these implicit conversions come at a cost; they prevent
-these iterators from conforming to standard iterator conventions, and thus
-from being usable with standard algorithms and containers. For example, they
-prevent the following code, where <tt>B</tt> is a <tt>BasicBlock</tt>,
-from compiling:</p>
-
-<div class="doc_code">
-<pre>
-  llvm::SmallVector&lt;llvm::Instruction *, 16&gt;(B-&gt;begin(), B-&gt;end());
-</pre>
-</div>
-
-<p>Because of this, these implicit conversions may be removed some day,
-and <tt>operator*</tt> changed to return a pointer instead of a reference.</p>
-
-</div>
-
-<!--_______________________________________________________________________-->
-<h4>
-  <a name="iterate_complex">Finding call sites: a slightly more complex
-  example</a>
-</h4>
-
-<div>
-
-<p>Say that you're writing a FunctionPass and would like to count all the
-locations in the entire module (that is, across every <tt>Function</tt>) where a
-certain function (i.e., some <tt>Function</tt>*) is already in scope.  As you'll
-learn later, you may want to use an <tt>InstVisitor</tt> to accomplish this in a
-much more straight-forward manner, but this example will allow us to explore how
-you'd do it if you didn't have <tt>InstVisitor</tt> around. In pseudo-code, this
-is what we want to do:</p>
-
-<div class="doc_code">
-<pre>
-initialize callCounter to zero
-for each Function f in the Module
-  for each BasicBlock b in f
-    for each Instruction i in b
-      if (i is a CallInst and calls the given function)
-        increment callCounter
-</pre>
-</div>
-
-<p>And the actual code is (remember, because we're writing a
-<tt>FunctionPass</tt>, our <tt>FunctionPass</tt>-derived class simply has to
-override the <tt>runOnFunction</tt> method):</p>
-
-<div class="doc_code">
-<pre>
-Function* targetFunc = ...;
-
-class OurFunctionPass : public FunctionPass {
-  public:
-    OurFunctionPass(): callCounter(0) { }
-
-    virtual runOnFunction(Function&amp; F) {
-      for (Function::iterator b = F.begin(), be = F.end(); b != be; ++b) {
-        for (BasicBlock::iterator i = b-&gt;begin(), ie = b-&gt;end(); i != ie; ++i) {
-          if (<a href="#CallInst">CallInst</a>* callInst = <a href="#isa">dyn_cast</a>&lt;<a
- href="#CallInst">CallInst</a>&gt;(&amp;*i)) {
-            // <i>We know we've encountered a call instruction, so we</i>
-            // <i>need to determine if it's a call to the</i>
-            // <i>function pointed to by m_func or not.</i>
-            if (callInst-&gt;getCalledFunction() == targetFunc)
-              ++callCounter;
-          }
-        }
-      }
-    }
-
-  private:
-    unsigned callCounter;
-};
-</pre>
-</div>
-
-</div>
-
-<!--_______________________________________________________________________-->
-<h4>
-  <a name="calls_and_invokes">Treating calls and invokes the same way</a>
-</h4>
-
-<div>
-
-<p>You may have noticed that the previous example was a bit oversimplified in
-that it did not deal with call sites generated by 'invoke' instructions. In
-this, and in other situations, you may find that you want to treat
-<tt>CallInst</tt>s and <tt>InvokeInst</tt>s the same way, even though their
-most-specific common base class is <tt>Instruction</tt>, which includes lots of
-less closely-related things. For these cases, LLVM provides a handy wrapper
-class called <a
-href="http://llvm.org/doxygen/classllvm_1_1CallSite.html"><tt>CallSite</tt></a>.
-It is essentially a wrapper around an <tt>Instruction</tt> pointer, with some
-methods that provide functionality common to <tt>CallInst</tt>s and
-<tt>InvokeInst</tt>s.</p>
-
-<p>This class has "value semantics": it should be passed by value, not by
-reference and it should not be dynamically allocated or deallocated using
-<tt>operator new</tt> or <tt>operator delete</tt>. It is efficiently copyable,
-assignable and constructable, with costs equivalents to that of a bare pointer.
-If you look at its definition, it has only a single pointer member.</p>
-
-</div>
-
-<!--_______________________________________________________________________-->
-<h4>
-  <a name="iterate_chains">Iterating over def-use &amp; use-def chains</a>
-</h4>
-
-<div>
-
-<p>Frequently, we might have an instance of the <a
-href="/doxygen/classllvm_1_1Value.html">Value Class</a> and we want to
-determine which <tt>User</tt>s use the <tt>Value</tt>.  The list of all
-<tt>User</tt>s of a particular <tt>Value</tt> is called a <i>def-use</i> chain.
-For example, let's say we have a <tt>Function*</tt> named <tt>F</tt> to a
-particular function <tt>foo</tt>. Finding all of the instructions that
-<i>use</i> <tt>foo</tt> is as simple as iterating over the <i>def-use</i> chain
-of <tt>F</tt>:</p>
-
-<div class="doc_code">
-<pre>
-Function *F = ...;
-
-for (Value::use_iterator i = F-&gt;use_begin(), e = F-&gt;use_end(); i != e; ++i)
-  if (Instruction *Inst = dyn_cast&lt;Instruction&gt;(*i)) {
-    errs() &lt;&lt; "F is used in instruction:\n";
-    errs() &lt;&lt; *Inst &lt;&lt; "\n";
-  }
-</pre>
-</div>
-
-<p>Note that dereferencing a <tt>Value::use_iterator</tt> is not a very cheap
-operation. Instead of performing <tt>*i</tt> above several times, consider
-doing it only once in the loop body and reusing its result.</p>
-
-<p>Alternatively, it's common to have an instance of the <a
-href="/doxygen/classllvm_1_1User.html">User Class</a> and need to know what
-<tt>Value</tt>s are used by it.  The list of all <tt>Value</tt>s used by a
-<tt>User</tt> is known as a <i>use-def</i> chain.  Instances of class
-<tt>Instruction</tt> are common <tt>User</tt>s, so we might want to iterate over
-all of the values that a particular instruction uses (that is, the operands of
-the particular <tt>Instruction</tt>):</p>
-
-<div class="doc_code">
-<pre>
-Instruction *pi = ...;
-
-for (User::op_iterator i = pi-&gt;op_begin(), e = pi-&gt;op_end(); i != e; ++i) {
-  Value *v = *i;
-  // <i>...</i>
-}
-</pre>
-</div>
-
-<p>Declaring objects as <tt>const</tt> is an important tool of enforcing
-mutation free algorithms (such as analyses, etc.). For this purpose above
-iterators come in constant flavors as <tt>Value::const_use_iterator</tt>
-and <tt>Value::const_op_iterator</tt>.  They automatically arise when
-calling <tt>use/op_begin()</tt> on <tt>const Value*</tt>s or
-<tt>const User*</tt>s respectively.  Upon dereferencing, they return
-<tt>const Use*</tt>s. Otherwise the above patterns remain unchanged.</p>
-
-</div>
-
-<!--_______________________________________________________________________-->
-<h4>
-  <a name="iterate_preds">Iterating over predecessors &amp;
-successors of blocks</a>
-</h4>
-
-<div>
-
-<p>Iterating over the predecessors and successors of a block is quite easy
-with the routines defined in <tt>"llvm/Support/CFG.h"</tt>.  Just use code like
-this to iterate over all predecessors of BB:</p>
-
-<div class="doc_code">
-<pre>
-#include "llvm/Support/CFG.h"
-BasicBlock *BB = ...;
-
-for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
-  BasicBlock *Pred = *PI;
-  // <i>...</i>
-}
-</pre>
-</div>
-
-<p>Similarly, to iterate over successors use
-succ_iterator/succ_begin/succ_end.</p>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="simplechanges">Making simple changes</a>
-</h3>
-
-<div>
-
-<p>There are some primitive transformation operations present in the LLVM
-infrastructure that are worth knowing about.  When performing
-transformations, it's fairly common to manipulate the contents of basic
-blocks. This section describes some of the common methods for doing so
-and gives example code.</p>
-
-<!--_______________________________________________________________________-->
-<h4>
-  <a name="schanges_creating">Creating and inserting new
-  <tt>Instruction</tt>s</a>
-</h4>
-
-<div>
-
-<p><i>Instantiating Instructions</i></p>
-
-<p>Creation of <tt>Instruction</tt>s is straight-forward: simply call the
-constructor for the kind of instruction to instantiate and provide the necessary
-parameters. For example, an <tt>AllocaInst</tt> only <i>requires</i> a
-(const-ptr-to) <tt>Type</tt>. Thus:</p> 
-
-<div class="doc_code">
-<pre>
-AllocaInst* ai = new AllocaInst(Type::Int32Ty);
-</pre>
-</div>
-
-<p>will create an <tt>AllocaInst</tt> instance that represents the allocation of
-one integer in the current stack frame, at run time. Each <tt>Instruction</tt>
-subclass is likely to have varying default parameters which change the semantics
-of the instruction, so refer to the <a
-href="/doxygen/classllvm_1_1Instruction.html">doxygen documentation for the subclass of
-Instruction</a> that you're interested in instantiating.</p>
-
-<p><i>Naming values</i></p>
-
-<p>It is very useful to name the values of instructions when you're able to, as
-this facilitates the debugging of your transformations.  If you end up looking
-at generated LLVM machine code, you definitely want to have logical names
-associated with the results of instructions!  By supplying a value for the
-<tt>Name</tt> (default) parameter of the <tt>Instruction</tt> constructor, you
-associate a logical name with the result of the instruction's execution at
-run time.  For example, say that I'm writing a transformation that dynamically
-allocates space for an integer on the stack, and that integer is going to be
-used as some kind of index by some other code.  To accomplish this, I place an
-<tt>AllocaInst</tt> at the first point in the first <tt>BasicBlock</tt> of some
-<tt>Function</tt>, and I'm intending to use it within the same
-<tt>Function</tt>. I might do:</p>
-
-<div class="doc_code">
-<pre>
-AllocaInst* pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc");
-</pre>
-</div>
-
-<p>where <tt>indexLoc</tt> is now the logical name of the instruction's
-execution value, which is a pointer to an integer on the run time stack.</p>
-
-<p><i>Inserting instructions</i></p>
-
-<p>There are essentially two ways to insert an <tt>Instruction</tt>
-into an existing sequence of instructions that form a <tt>BasicBlock</tt>:</p>
-
-<ul>
-  <li>Insertion into an explicit instruction list
-
-    <p>Given a <tt>BasicBlock* pb</tt>, an <tt>Instruction* pi</tt> within that
-    <tt>BasicBlock</tt>, and a newly-created instruction we wish to insert
-    before <tt>*pi</tt>, we do the following: </p>
-
-<div class="doc_code">
-<pre>
-BasicBlock *pb = ...;
-Instruction *pi = ...;
-Instruction *newInst = new Instruction(...);
-
-pb-&gt;getInstList().insert(pi, newInst); // <i>Inserts newInst before pi in pb</i>
-</pre>
-</div>
-
-    <p>Appending to the end of a <tt>BasicBlock</tt> is so common that
-    the <tt>Instruction</tt> class and <tt>Instruction</tt>-derived
-    classes provide constructors which take a pointer to a
-    <tt>BasicBlock</tt> to be appended to. For example code that
-    looked like: </p>
-
-<div class="doc_code">
-<pre>
-BasicBlock *pb = ...;
-Instruction *newInst = new Instruction(...);
-
-pb-&gt;getInstList().push_back(newInst); // <i>Appends newInst to pb</i>
-</pre>
-</div>
-
-    <p>becomes: </p>
-
-<div class="doc_code">
-<pre>
-BasicBlock *pb = ...;
-Instruction *newInst = new Instruction(..., pb);
-</pre>
-</div>
-
-    <p>which is much cleaner, especially if you are creating
-    long instruction streams.</p></li>
-
-  <li>Insertion into an implicit instruction list
-
-    <p><tt>Instruction</tt> instances that are already in <tt>BasicBlock</tt>s
-    are implicitly associated with an existing instruction list: the instruction
-    list of the enclosing basic block. Thus, we could have accomplished the same
-    thing as the above code without being given a <tt>BasicBlock</tt> by doing:
-    </p>
-
-<div class="doc_code">
-<pre>
-Instruction *pi = ...;
-Instruction *newInst = new Instruction(...);
-
-pi-&gt;getParent()-&gt;getInstList().insert(pi, newInst);
-</pre>
-</div>
-
-    <p>In fact, this sequence of steps occurs so frequently that the
-    <tt>Instruction</tt> class and <tt>Instruction</tt>-derived classes provide
-    constructors which take (as a default parameter) a pointer to an
-    <tt>Instruction</tt> which the newly-created <tt>Instruction</tt> should
-    precede.  That is, <tt>Instruction</tt> constructors are capable of
-    inserting the newly-created instance into the <tt>BasicBlock</tt> of a
-    provided instruction, immediately before that instruction.  Using an
-    <tt>Instruction</tt> constructor with a <tt>insertBefore</tt> (default)
-    parameter, the above code becomes:</p>
-
-<div class="doc_code">
-<pre>
-Instruction* pi = ...;
-Instruction* newInst = new Instruction(..., pi);
-</pre>
-</div>
-
-    <p>which is much cleaner, especially if you're creating a lot of
-    instructions and adding them to <tt>BasicBlock</tt>s.</p></li>
-</ul>
-
-</div>
-
-<!--_______________________________________________________________________-->
-<h4>
-  <a name="schanges_deleting">Deleting <tt>Instruction</tt>s</a>
-</h4>
-
-<div>
-
-<p>Deleting an instruction from an existing sequence of instructions that form a
-<a href="#BasicBlock"><tt>BasicBlock</tt></a> is very straight-forward: just
-call the instruction's eraseFromParent() method.  For example:</p>
-
-<div class="doc_code">
-<pre>
-<a href="#Instruction">Instruction</a> *I = .. ;
-I-&gt;eraseFromParent();
-</pre>
-</div>
-
-<p>This unlinks the instruction from its containing basic block and deletes 
-it.  If you'd just like to unlink the instruction from its containing basic
-block but not delete it, you can use the <tt>removeFromParent()</tt> method.</p>
-
-</div>
-
-<!--_______________________________________________________________________-->
-<h4>
-  <a name="schanges_replacing">Replacing an <tt>Instruction</tt> with another
-  <tt>Value</tt></a>
-</h4>
-
-<div>
-
-<h5><i>Replacing individual instructions</i></h5>
-
-<p>Including "<a href="/doxygen/BasicBlockUtils_8h-source.html">llvm/Transforms/Utils/BasicBlockUtils.h</a>"
-permits use of two very useful replace functions: <tt>ReplaceInstWithValue</tt>
-and <tt>ReplaceInstWithInst</tt>.</p>
-
-<h5><a name="schanges_deleting">Deleting <tt>Instruction</tt>s</a></h5>
-
-<div>
-<ul>
-  <li><tt>ReplaceInstWithValue</tt>
-
-    <p>This function replaces all uses of a given instruction with a value,
-    and then removes the original instruction. The following example
-    illustrates the replacement of the result of a particular
-    <tt>AllocaInst</tt> that allocates memory for a single integer with a null
-    pointer to an integer.</p>
-
-<div class="doc_code">
-<pre>
-AllocaInst* instToReplace = ...;
-BasicBlock::iterator ii(instToReplace);
-
-ReplaceInstWithValue(instToReplace-&gt;getParent()-&gt;getInstList(), ii,
-                     Constant::getNullValue(PointerType::getUnqual(Type::Int32Ty)));
-</pre></div></li>
-
-  <li><tt>ReplaceInstWithInst</tt> 
-
-    <p>This function replaces a particular instruction with another
-    instruction, inserting the new instruction into the basic block at the
-    location where the old instruction was, and replacing any uses of the old
-    instruction with the new instruction. The following example illustrates
-    the replacement of one <tt>AllocaInst</tt> with another.</p>
-
-<div class="doc_code">
-<pre>
-AllocaInst* instToReplace = ...;
-BasicBlock::iterator ii(instToReplace);
-
-ReplaceInstWithInst(instToReplace-&gt;getParent()-&gt;getInstList(), ii,
-                    new AllocaInst(Type::Int32Ty, 0, "ptrToReplacedInt"));
-</pre></div></li>
-</ul>
-
-</div>
-
-<h5><i>Replacing multiple uses of <tt>User</tt>s and <tt>Value</tt>s</i></h5>
-
-<p>You can use <tt>Value::replaceAllUsesWith</tt> and
-<tt>User::replaceUsesOfWith</tt> to change more than one use at a time.  See the
-doxygen documentation for the <a href="/doxygen/classllvm_1_1Value.html">Value Class</a>
-and <a href="/doxygen/classllvm_1_1User.html">User Class</a>, respectively, for more
-information.</p>
-
-<!-- Value::replaceAllUsesWith User::replaceUsesOfWith Point out:
-include/llvm/Transforms/Utils/ especially BasicBlockUtils.h with:
-ReplaceInstWithValue, ReplaceInstWithInst -->
-
-</div>
-
-<!--_______________________________________________________________________-->
-<h4>
-  <a name="schanges_deletingGV">Deleting <tt>GlobalVariable</tt>s</a>
-</h4>
-
-<div>
-
-<p>Deleting a global variable from a module is just as easy as deleting an 
-Instruction. First, you must have a pointer to the global variable that you wish
- to delete.  You use this pointer to erase it from its parent, the module.
- For example:</p>
-
-<div class="doc_code">
-<pre>
-<a href="#GlobalVariable">GlobalVariable</a> *GV = .. ;
-
-GV-&gt;eraseFromParent();
-</pre>
-</div>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="create_types">How to Create Types</a>
-</h3>
-
-<div>
-
-<p>In generating IR, you may need some complex types.  If you know these types
-statically, you can use <tt>TypeBuilder&lt;...&gt;::get()</tt>, defined
-in <tt>llvm/Support/TypeBuilder.h</tt>, to retrieve them.  <tt>TypeBuilder</tt>
-has two forms depending on whether you're building types for cross-compilation
-or native library use.  <tt>TypeBuilder&lt;T, true&gt;</tt> requires
-that <tt>T</tt> be independent of the host environment, meaning that it's built
-out of types from
-the <a href="/doxygen/namespacellvm_1_1types.html"><tt>llvm::types</tt></a>
-namespace and pointers, functions, arrays, etc. built of
-those.  <tt>TypeBuilder&lt;T, false&gt;</tt> additionally allows native C types
-whose size may depend on the host compiler.  For example,</p>
-
-<div class="doc_code">
-<pre>
-FunctionType *ft = TypeBuilder&lt;types::i&lt;8&gt;(types::i&lt;32&gt;*), true&gt;::get();
-</pre>
-</div>
-
-<p>is easier to read and write than the equivalent</p>
-
-<div class="doc_code">
-<pre>
-std::vector&lt;const Type*&gt; params;
-params.push_back(PointerType::getUnqual(Type::Int32Ty));
-FunctionType *ft = FunctionType::get(Type::Int8Ty, params, false);
-</pre>
-</div>
-
-<p>See the <a href="/doxygen/TypeBuilder_8h-source.html#l00001">class
-comment</a> for more details.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="threading">Threads and LLVM</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-<p>
-This section describes the interaction of the LLVM APIs with multithreading,
-both on the part of client applications, and in the JIT, in the hosted
-application.
-</p>
-
-<p>
-Note that LLVM's support for multithreading is still relatively young.  Up 
-through version 2.5, the execution of threaded hosted applications was
-supported, but not threaded client access to the APIs.  While this use case is
-now supported, clients <em>must</em> adhere to the guidelines specified below to
-ensure proper operation in multithreaded mode.
-</p>
-
-<p>
-Note that, on Unix-like platforms, LLVM requires the presence of GCC's atomic
-intrinsics in order to support threaded operation.  If you need a
-multhreading-capable LLVM on a platform without a suitably modern system
-compiler, consider compiling LLVM and LLVM-GCC in single-threaded mode, and 
-using the resultant compiler to build a copy of LLVM with multithreading
-support.
-</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="startmultithreaded">Entering and Exiting Multithreaded Mode</a>
-</h3>
-
-<div>
-
-<p>
-In order to properly protect its internal data structures while avoiding 
-excessive locking overhead in the single-threaded case, the LLVM must intialize
-certain data structures necessary to provide guards around its internals.  To do
-so, the client program must invoke <tt>llvm_start_multithreaded()</tt> before
-making any concurrent LLVM API calls.  To subsequently tear down these
-structures, use the <tt>llvm_stop_multithreaded()</tt> call.  You can also use
-the <tt>llvm_is_multithreaded()</tt> call to check the status of multithreaded
-mode.
-</p>
-
-<p>
-Note that both of these calls must be made <em>in isolation</em>.  That is to
-say that no other LLVM API calls may be executing at any time during the 
-execution of <tt>llvm_start_multithreaded()</tt> or <tt>llvm_stop_multithreaded
-</tt>.  It's is the client's responsibility to enforce this isolation.
-</p>
-
-<p>
-The return value of <tt>llvm_start_multithreaded()</tt> indicates the success or
-failure of the initialization.  Failure typically indicates that your copy of
-LLVM was built without multithreading support, typically because GCC atomic
-intrinsics were not found in your system compiler.  In this case, the LLVM API
-will not be safe for concurrent calls.  However, it <em>will</em> be safe for
-hosting threaded applications in the JIT, though <a href="#jitthreading">care
-must be taken</a> to ensure that side exits and the like do not accidentally
-result in concurrent LLVM API calls.
-</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="shutdown">Ending Execution with <tt>llvm_shutdown()</tt></a>
-</h3>
-
-<div>
-<p>
-When you are done using the LLVM APIs, you should call <tt>llvm_shutdown()</tt>
-to deallocate memory used for internal structures.  This will also invoke 
-<tt>llvm_stop_multithreaded()</tt> if LLVM is operating in multithreaded mode.
-As such, <tt>llvm_shutdown()</tt> requires the same isolation guarantees as
-<tt>llvm_stop_multithreaded()</tt>.
-</p>
-
-<p>
-Note that, if you use scope-based shutdown, you can use the
-<tt>llvm_shutdown_obj</tt> class, which calls <tt>llvm_shutdown()</tt> in its
-destructor.
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="managedstatic">Lazy Initialization with <tt>ManagedStatic</tt></a>
-</h3>
-
-<div>
-<p>
-<tt>ManagedStatic</tt> is a utility class in LLVM used to implement static
-initialization of static resources, such as the global type tables.  Before the
-invocation of <tt>llvm_shutdown()</tt>, it implements a simple lazy 
-initialization scheme.  Once <tt>llvm_start_multithreaded()</tt> returns,
-however, it uses double-checked locking to implement thread-safe lazy
-initialization.
-</p>
-
-<p>
-Note that, because no other threads are allowed to issue LLVM API calls before
-<tt>llvm_start_multithreaded()</tt> returns, it is possible to have 
-<tt>ManagedStatic</tt>s of <tt>llvm::sys::Mutex</tt>s.
-</p>
-
-<p>
-The <tt>llvm_acquire_global_lock()</tt> and <tt>llvm_release_global_lock</tt> 
-APIs provide access to the global lock used to implement the double-checked
-locking for lazy initialization.  These should only be used internally to LLVM,
-and only if you know what you're doing!
-</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="llvmcontext">Achieving Isolation with <tt>LLVMContext</tt></a>
-</h3>
-
-<div>
-<p>
-<tt>LLVMContext</tt> is an opaque class in the LLVM API which clients can use
-to operate multiple, isolated instances of LLVM concurrently within the same
-address space.  For instance, in a hypothetical compile-server, the compilation
-of an individual translation unit is conceptually independent from all the 
-others, and it would be desirable to be able to compile incoming translation 
-units concurrently on independent server threads.  Fortunately, 
-<tt>LLVMContext</tt> exists to enable just this kind of scenario!
-</p>
-
-<p>
-Conceptually, <tt>LLVMContext</tt> provides isolation.  Every LLVM entity 
-(<tt>Module</tt>s, <tt>Value</tt>s, <tt>Type</tt>s, <tt>Constant</tt>s, etc.)
-in LLVM's in-memory IR belongs to an <tt>LLVMContext</tt>.  Entities in 
-different contexts <em>cannot</em> interact with each other: <tt>Module</tt>s in
-different contexts cannot be linked together, <tt>Function</tt>s cannot be added
-to <tt>Module</tt>s in different contexts, etc.  What this means is that is is
-safe to compile on multiple threads simultaneously, as long as no two threads
-operate on entities within the same context.
-</p>
-
-<p>
-In practice, very few places in the API require the explicit specification of a
-<tt>LLVMContext</tt>, other than the <tt>Type</tt> creation/lookup APIs.
-Because every <tt>Type</tt> carries a reference to its owning context, most
-other entities can determine what context they belong to by looking at their
-own <tt>Type</tt>.  If you are adding new entities to LLVM IR, please try to
-maintain this interface design.
-</p>
-
-<p>
-For clients that do <em>not</em> require the benefits of isolation, LLVM 
-provides a convenience API <tt>getGlobalContext()</tt>.  This returns a global,
-lazily initialized <tt>LLVMContext</tt> that may be used in situations where
-isolation is not a concern.
-</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="jitthreading">Threads and the JIT</a>
-</h3>
-
-<div>
-<p>
-LLVM's "eager" JIT compiler is safe to use in threaded programs.  Multiple
-threads can call <tt>ExecutionEngine::getPointerToFunction()</tt> or
-<tt>ExecutionEngine::runFunction()</tt> concurrently, and multiple threads can
-run code output by the JIT concurrently.  The user must still ensure that only
-one thread accesses IR in a given <tt>LLVMContext</tt> while another thread
-might be modifying it.  One way to do that is to always hold the JIT lock while
-accessing IR outside the JIT (the JIT <em>modifies</em> the IR by adding
-<tt>CallbackVH</tt>s).  Another way is to only
-call <tt>getPointerToFunction()</tt> from the <tt>LLVMContext</tt>'s thread.
-</p>
-
-<p>When the JIT is configured to compile lazily (using
-<tt>ExecutionEngine::DisableLazyCompilation(false)</tt>), there is currently a
-<a href="http://llvm.org/bugs/show_bug.cgi?id=5184">race condition</a> in
-updating call sites after a function is lazily-jitted.  It's still possible to
-use the lazy JIT in a threaded program if you ensure that only one thread at a
-time can call any particular lazy stub and that the JIT lock guards any IR
-access, but we suggest using only the eager JIT in threaded programs.
-</p>
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="advanced">Advanced Topics</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-<p>
-This section describes some of the advanced or obscure API's that most clients
-do not need to be aware of.  These API's tend manage the inner workings of the
-LLVM system, and only need to be accessed in unusual circumstances.
-</p>
-
-  
-<!-- ======================================================================= -->
-<h3>
-  <a name="SymbolTable">The <tt>ValueSymbolTable</tt> class</a>
-</h3>
-
-<div>
-<p>The <tt><a href="http://llvm.org/doxygen/classllvm_1_1ValueSymbolTable.html">
-ValueSymbolTable</a></tt> class provides a symbol table that the <a
-href="#Function"><tt>Function</tt></a> and <a href="#Module">
-<tt>Module</tt></a> classes use for naming value definitions. The symbol table
-can provide a name for any <a href="#Value"><tt>Value</tt></a>. 
-</p>
-
-<p>Note that the <tt>SymbolTable</tt> class should not be directly accessed 
-by most clients.  It should only be used when iteration over the symbol table 
-names themselves are required, which is very special purpose.  Note that not 
-all LLVM
-<tt><a href="#Value">Value</a></tt>s have names, and those without names (i.e. they have
-an empty name) do not exist in the symbol table.
-</p>
-
-<p>Symbol tables support iteration over the values in the symbol
-table with <tt>begin/end/iterator</tt> and supports querying to see if a
-specific name is in the symbol table (with <tt>lookup</tt>).  The
-<tt>ValueSymbolTable</tt> class exposes no public mutator methods, instead,
-simply call <tt>setName</tt> on a value, which will autoinsert it into the
-appropriate symbol table.</p>
-
-</div>
-
-
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="UserLayout">The <tt>User</tt> and owned <tt>Use</tt> classes' memory layout</a>
-</h3>
-
-<div>
-<p>The <tt><a href="http://llvm.org/doxygen/classllvm_1_1User.html">
-User</a></tt> class provides a basis for expressing the ownership of <tt>User</tt>
-towards other <tt><a href="http://llvm.org/doxygen/classllvm_1_1Value.html">
-Value</a></tt>s. The <tt><a href="http://llvm.org/doxygen/classllvm_1_1Use.html">
-Use</a></tt> helper class is employed to do the bookkeeping and to facilitate <i>O(1)</i>
-addition and removal.</p>
-
-<!-- ______________________________________________________________________ -->
-<h4>
-  <a name="Use2User">
-    Interaction and relationship between <tt>User</tt> and <tt>Use</tt> objects
-  </a>
-</h4>
-
-<div>
-<p>
-A subclass of <tt>User</tt> can choose between incorporating its <tt>Use</tt> objects
-or refer to them out-of-line by means of a pointer. A mixed variant
-(some <tt>Use</tt>s inline others hung off) is impractical and breaks the invariant
-that the <tt>Use</tt> objects belonging to the same <tt>User</tt> form a contiguous array.
-</p>
-
-<p>
-We have 2 different layouts in the <tt>User</tt> (sub)classes:
-<ul>
-<li><p>Layout a)
-The <tt>Use</tt> object(s) are inside (resp. at fixed offset) of the <tt>User</tt>
-object and there are a fixed number of them.</p>
-
-<li><p>Layout b)
-The <tt>Use</tt> object(s) are referenced by a pointer to an
-array from the <tt>User</tt> object and there may be a variable
-number of them.</p>
-</ul>
-<p>
-As of v2.4 each layout still possesses a direct pointer to the
-start of the array of <tt>Use</tt>s. Though not mandatory for layout a),
-we stick to this redundancy for the sake of simplicity.
-The <tt>User</tt> object also stores the number of <tt>Use</tt> objects it
-has. (Theoretically this information can also be calculated
-given the scheme presented below.)</p>
-<p>
-Special forms of allocation operators (<tt>operator new</tt>)
-enforce the following memory layouts:</p>
-
-<ul>
-<li><p>Layout a) is modelled by prepending the <tt>User</tt> object by the <tt>Use[]</tt> array.</p>
-
-<pre>
-...---.---.---.---.-------...
-  | P | P | P | P | User
-'''---'---'---'---'-------'''
-</pre>
-
-<li><p>Layout b) is modelled by pointing at the <tt>Use[]</tt> array.</p>
-<pre>
-.-------...
-| User
-'-------'''
-    |
-    v
-    .---.---.---.---...
-    | P | P | P | P |
-    '---'---'---'---'''
-</pre>
-</ul>
-<i>(In the above figures '<tt>P</tt>' stands for the <tt>Use**</tt> that
-    is stored in each <tt>Use</tt> object in the member <tt>Use::Prev</tt>)</i>
-
-</div>
-
-<!-- ______________________________________________________________________ -->
-<h4>
-  <a name="Waymarking">The waymarking algorithm</a>
-</h4>
-
-<div>
-<p>
-Since the <tt>Use</tt> objects are deprived of the direct (back)pointer to
-their <tt>User</tt> objects, there must be a fast and exact method to
-recover it. This is accomplished by the following scheme:</p>
-
-A bit-encoding in the 2 LSBits (least significant bits) of the <tt>Use::Prev</tt> allows to find the
-start of the <tt>User</tt> object:
-<ul>
-<li><tt>00</tt> &mdash;&gt; binary digit 0</li>
-<li><tt>01</tt> &mdash;&gt; binary digit 1</li>
-<li><tt>10</tt> &mdash;&gt; stop and calculate (<tt>s</tt>)</li>
-<li><tt>11</tt> &mdash;&gt; full stop (<tt>S</tt>)</li>
-</ul>
-<p>
-Given a <tt>Use*</tt>, all we have to do is to walk till we get
-a stop and we either have a <tt>User</tt> immediately behind or
-we have to walk to the next stop picking up digits
-and calculating the offset:</p>
-<pre>
-.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.----------------
-| 1 | s | 1 | 0 | 1 | 0 | s | 1 | 1 | 0 | s | 1 | 1 | s | 1 | S | User (or User*)
-'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'----------------
-    |+15                |+10            |+6         |+3     |+1
-    |                   |               |           |       |__>
-    |                   |               |           |__________>
-    |                   |               |______________________>
-    |                   |______________________________________>
-    |__________________________________________________________>
-</pre>
-<p>
-Only the significant number of bits need to be stored between the
-stops, so that the <i>worst case is 20 memory accesses</i> when there are
-1000 <tt>Use</tt> objects associated with a <tt>User</tt>.</p>
-
-</div>
-
-<!-- ______________________________________________________________________ -->
-<h4>
-  <a name="ReferenceImpl">Reference implementation</a>
-</h4>
-
-<div>
-<p>
-The following literate Haskell fragment demonstrates the concept:</p>
-
-<div class="doc_code">
-<pre>
-> import Test.QuickCheck
-> 
-> digits :: Int -> [Char] -> [Char]
-> digits 0 acc = '0' : acc
-> digits 1 acc = '1' : acc
-> digits n acc = digits (n `div` 2) $ digits (n `mod` 2) acc
-> 
-> dist :: Int -> [Char] -> [Char]
-> dist 0 [] = ['S']
-> dist 0 acc = acc
-> dist 1 acc = let r = dist 0 acc in 's' : digits (length r) r
-> dist n acc = dist (n - 1) $ dist 1 acc
-> 
-> takeLast n ss = reverse $ take n $ reverse ss
-> 
-> test = takeLast 40 $ dist 20 []
-> 
-</pre>
-</div>
-<p>
-Printing &lt;test&gt; gives: <tt>"1s100000s11010s10100s1111s1010s110s11s1S"</tt></p>
-<p>
-The reverse algorithm computes the length of the string just by examining
-a certain prefix:</p>
-
-<div class="doc_code">
-<pre>
-> pref :: [Char] -> Int
-> pref "S" = 1
-> pref ('s':'1':rest) = decode 2 1 rest
-> pref (_:rest) = 1 + pref rest
-> 
-> decode walk acc ('0':rest) = decode (walk + 1) (acc * 2) rest
-> decode walk acc ('1':rest) = decode (walk + 1) (acc * 2 + 1) rest
-> decode walk acc _ = walk + acc
-> 
-</pre>
-</div>
-<p>
-Now, as expected, printing &lt;pref test&gt; gives <tt>40</tt>.</p>
-<p>
-We can <i>quickCheck</i> this with following property:</p>
-
-<div class="doc_code">
-<pre>
-> testcase = dist 2000 []
-> testcaseLength = length testcase
-> 
-> identityProp n = n > 0 && n <= testcaseLength ==> length arr == pref arr
->     where arr = takeLast n testcase
-> 
-</pre>
-</div>
-<p>
-As expected &lt;quickCheck identityProp&gt; gives:</p>
-
-<pre>
-*Main> quickCheck identityProp
-OK, passed 100 tests.
-</pre>
-<p>
-Let's be a bit more exhaustive:</p>
-
-<div class="doc_code">
-<pre>
-> 
-> deepCheck p = check (defaultConfig { configMaxTest = 500 }) p
-> 
-</pre>
-</div>
-<p>
-And here is the result of &lt;deepCheck identityProp&gt;:</p>
-
-<pre>
-*Main> deepCheck identityProp
-OK, passed 500 tests.
-</pre>
-
-</div>
-
-<!-- ______________________________________________________________________ -->
-<h4>
-  <a name="Tagging">Tagging considerations</a>
-</h4>
-
-<div>
-
-<p>
-To maintain the invariant that the 2 LSBits of each <tt>Use**</tt> in <tt>Use</tt>
-never change after being set up, setters of <tt>Use::Prev</tt> must re-tag the
-new <tt>Use**</tt> on every modification. Accordingly getters must strip the
-tag bits.</p>
-<p>
-For layout b) instead of the <tt>User</tt> we find a pointer (<tt>User*</tt> with LSBit set).
-Following this pointer brings us to the <tt>User</tt>. A portable trick ensures
-that the first bytes of <tt>User</tt> (if interpreted as a pointer) never has
-the LSBit set. (Portability is relying on the fact that all known compilers place the
-<tt>vptr</tt> in the first word of the instances.)</p>
-
-</div>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="coreclasses">The Core LLVM Class Hierarchy Reference </a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-<p><tt>#include "<a href="/doxygen/Type_8h-source.html">llvm/Type.h</a>"</tt>
-<br>doxygen info: <a href="/doxygen/classllvm_1_1Type.html">Type Class</a></p>
-
-<p>The Core LLVM classes are the primary means of representing the program
-being inspected or transformed.  The core LLVM classes are defined in
-header files in the <tt>include/llvm/</tt> directory, and implemented in
-the <tt>lib/VMCore</tt> directory.</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="Type">The <tt>Type</tt> class and Derived Types</a>
-</h3>
-
-<div>
-
-  <p><tt>Type</tt> is a superclass of all type classes. Every <tt>Value</tt> has
-  a <tt>Type</tt>. <tt>Type</tt> cannot be instantiated directly but only
-  through its subclasses. Certain primitive types (<tt>VoidType</tt>,
-  <tt>LabelType</tt>, <tt>FloatType</tt> and <tt>DoubleType</tt>) have hidden 
-  subclasses. They are hidden because they offer no useful functionality beyond
-  what the <tt>Type</tt> class offers except to distinguish themselves from 
-  other subclasses of <tt>Type</tt>.</p>
-  <p>All other types are subclasses of <tt>DerivedType</tt>.  Types can be 
-  named, but this is not a requirement. There exists exactly 
-  one instance of a given shape at any one time.  This allows type equality to
-  be performed with address equality of the Type Instance. That is, given two 
-  <tt>Type*</tt> values, the types are identical if the pointers are identical.
-  </p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_Type">Important Public Methods</a>
-</h4>
-
-<div>
-
-<ul>
-  <li><tt>bool isIntegerTy() const</tt>: Returns true for any integer type.</li>
-
-  <li><tt>bool isFloatingPointTy()</tt>: Return true if this is one of the five
-  floating point types.</li>
-
-  <li><tt>bool isSized()</tt>: Return true if the type has known size. Things
-  that don't have a size are abstract types, labels and void.</li>
-
-</ul>
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="derivedtypes">Important Derived Types</a>
-</h4>
-<div>
-<dl>
-  <dt><tt>IntegerType</tt></dt>
-  <dd>Subclass of DerivedType that represents integer types of any bit width. 
-  Any bit width between <tt>IntegerType::MIN_INT_BITS</tt> (1) and 
-  <tt>IntegerType::MAX_INT_BITS</tt> (~8 million) can be represented.
-  <ul>
-    <li><tt>static const IntegerType* get(unsigned NumBits)</tt>: get an integer
-    type of a specific bit width.</li>
-    <li><tt>unsigned getBitWidth() const</tt>: Get the bit width of an integer
-    type.</li>
-  </ul>
-  </dd>
-  <dt><tt>SequentialType</tt></dt>
-  <dd>This is subclassed by ArrayType, PointerType and VectorType.
-    <ul>
-      <li><tt>const Type * getElementType() const</tt>: Returns the type of each
-      of the elements in the sequential type. </li>
-    </ul>
-  </dd>
-  <dt><tt>ArrayType</tt></dt>
-  <dd>This is a subclass of SequentialType and defines the interface for array 
-  types.
-    <ul>
-      <li><tt>unsigned getNumElements() const</tt>: Returns the number of 
-      elements in the array. </li>
-    </ul>
-  </dd>
-  <dt><tt>PointerType</tt></dt>
-  <dd>Subclass of SequentialType for pointer types.</dd>
-  <dt><tt>VectorType</tt></dt>
-  <dd>Subclass of SequentialType for vector types. A 
-  vector type is similar to an ArrayType but is distinguished because it is 
-  a first class type whereas ArrayType is not. Vector types are used for 
-  vector operations and are usually small vectors of of an integer or floating 
-  point type.</dd>
-  <dt><tt>StructType</tt></dt>
-  <dd>Subclass of DerivedTypes for struct types.</dd>
-  <dt><tt><a name="FunctionType">FunctionType</a></tt></dt>
-  <dd>Subclass of DerivedTypes for function types.
-    <ul>
-      <li><tt>bool isVarArg() const</tt>: Returns true if it's a vararg
-      function</li>
-      <li><tt> const Type * getReturnType() const</tt>: Returns the
-      return type of the function.</li>
-      <li><tt>const Type * getParamType (unsigned i)</tt>: Returns
-      the type of the ith parameter.</li>
-      <li><tt> const unsigned getNumParams() const</tt>: Returns the
-      number of formal parameters.</li>
-    </ul>
-  </dd>
-</dl>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="Module">The <tt>Module</tt> class</a>
-</h3>
-
-<div>
-
-<p><tt>#include "<a
-href="/doxygen/Module_8h-source.html">llvm/Module.h</a>"</tt><br> doxygen info:
-<a href="/doxygen/classllvm_1_1Module.html">Module Class</a></p>
-
-<p>The <tt>Module</tt> class represents the top level structure present in LLVM
-programs.  An LLVM module is effectively either a translation unit of the
-original program or a combination of several translation units merged by the
-linker.  The <tt>Module</tt> class keeps track of a list of <a
-href="#Function"><tt>Function</tt></a>s, a list of <a
-href="#GlobalVariable"><tt>GlobalVariable</tt></a>s, and a <a
-href="#SymbolTable"><tt>SymbolTable</tt></a>.  Additionally, it contains a few
-helpful member functions that try to make common operations easy.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_Module">Important Public Members of the <tt>Module</tt> class</a>
-</h4>
-
-<div>
-
-<ul>
-  <li><tt>Module::Module(std::string name = "")</tt>
-
-  <p>Constructing a <a href="#Module">Module</a> is easy. You can optionally
-provide a name for it (probably based on the name of the translation unit).</p>
-  </li>
-
-  <li><tt>Module::iterator</tt> - Typedef for function list iterator<br>
-    <tt>Module::const_iterator</tt> - Typedef for const_iterator.<br>
-
-    <tt>begin()</tt>, <tt>end()</tt>
-    <tt>size()</tt>, <tt>empty()</tt>
-
-    <p>These are forwarding methods that make it easy to access the contents of
-    a <tt>Module</tt> object's <a href="#Function"><tt>Function</tt></a>
-    list.</p></li>
-
-  <li><tt>Module::FunctionListType &amp;getFunctionList()</tt>
-
-    <p> Returns the list of <a href="#Function"><tt>Function</tt></a>s.  This is
-    necessary to use when you need to update the list or perform a complex
-    action that doesn't have a forwarding method.</p>
-
-    <p><!--  Global Variable --></p></li> 
-</ul>
-
-<hr>
-
-<ul>
-  <li><tt>Module::global_iterator</tt> - Typedef for global variable list iterator<br>
-
-    <tt>Module::const_global_iterator</tt> - Typedef for const_iterator.<br>
-
-    <tt>global_begin()</tt>, <tt>global_end()</tt>
-    <tt>global_size()</tt>, <tt>global_empty()</tt>
-
-    <p> These are forwarding methods that make it easy to access the contents of
-    a <tt>Module</tt> object's <a
-    href="#GlobalVariable"><tt>GlobalVariable</tt></a> list.</p></li>
-
-  <li><tt>Module::GlobalListType &amp;getGlobalList()</tt>
-
-    <p>Returns the list of <a
-    href="#GlobalVariable"><tt>GlobalVariable</tt></a>s.  This is necessary to
-    use when you need to update the list or perform a complex action that
-    doesn't have a forwarding method.</p>
-
-    <p><!--  Symbol table stuff --> </p></li>
-</ul>
-
-<hr>
-
-<ul>
-  <li><tt><a href="#SymbolTable">SymbolTable</a> *getSymbolTable()</tt>
-
-    <p>Return a reference to the <a href="#SymbolTable"><tt>SymbolTable</tt></a>
-    for this <tt>Module</tt>.</p>
-
-    <p><!--  Convenience methods --></p></li>
-</ul>
-
-<hr>
-
-<ul>
-
-  <li><tt><a href="#Function">Function</a> *getFunction(StringRef Name) const
-    </tt>
-
-    <p>Look up the specified function in the <tt>Module</tt> <a
-    href="#SymbolTable"><tt>SymbolTable</tt></a>. If it does not exist, return
-    <tt>null</tt>.</p></li>
-
-  <li><tt><a href="#Function">Function</a> *getOrInsertFunction(const
-  std::string &amp;Name, const <a href="#FunctionType">FunctionType</a> *T)</tt>
-
-    <p>Look up the specified function in the <tt>Module</tt> <a
-    href="#SymbolTable"><tt>SymbolTable</tt></a>. If it does not exist, add an
-    external declaration for the function and return it.</p></li>
-
-  <li><tt>std::string getTypeName(const <a href="#Type">Type</a> *Ty)</tt>
-
-    <p>If there is at least one entry in the <a
-    href="#SymbolTable"><tt>SymbolTable</tt></a> for the specified <a
-    href="#Type"><tt>Type</tt></a>, return it.  Otherwise return the empty
-    string.</p></li>
-
-  <li><tt>bool addTypeName(const std::string &amp;Name, const <a
-  href="#Type">Type</a> *Ty)</tt>
-
-    <p>Insert an entry in the <a href="#SymbolTable"><tt>SymbolTable</tt></a>
-    mapping <tt>Name</tt> to <tt>Ty</tt>. If there is already an entry for this
-    name, true is returned and the <a
-    href="#SymbolTable"><tt>SymbolTable</tt></a> is not modified.</p></li>
-</ul>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="Value">The <tt>Value</tt> class</a>
-</h3>
-
-<div>
-
-<p><tt>#include "<a href="/doxygen/Value_8h-source.html">llvm/Value.h</a>"</tt>
-<br> 
-doxygen info: <a href="/doxygen/classllvm_1_1Value.html">Value Class</a></p>
-
-<p>The <tt>Value</tt> class is the most important class in the LLVM Source
-base.  It represents a typed value that may be used (among other things) as an
-operand to an instruction.  There are many different types of <tt>Value</tt>s,
-such as <a href="#Constant"><tt>Constant</tt></a>s,<a
-href="#Argument"><tt>Argument</tt></a>s. Even <a
-href="#Instruction"><tt>Instruction</tt></a>s and <a
-href="#Function"><tt>Function</tt></a>s are <tt>Value</tt>s.</p>
-
-<p>A particular <tt>Value</tt> may be used many times in the LLVM representation
-for a program.  For example, an incoming argument to a function (represented
-with an instance of the <a href="#Argument">Argument</a> class) is "used" by
-every instruction in the function that references the argument.  To keep track
-of this relationship, the <tt>Value</tt> class keeps a list of all of the <a
-href="#User"><tt>User</tt></a>s that is using it (the <a
-href="#User"><tt>User</tt></a> class is a base class for all nodes in the LLVM
-graph that can refer to <tt>Value</tt>s).  This use list is how LLVM represents
-def-use information in the program, and is accessible through the <tt>use_</tt>*
-methods, shown below.</p>
-
-<p>Because LLVM is a typed representation, every LLVM <tt>Value</tt> is typed,
-and this <a href="#Type">Type</a> is available through the <tt>getType()</tt>
-method. In addition, all LLVM values can be named.  The "name" of the
-<tt>Value</tt> is a symbolic string printed in the LLVM code:</p>
-
-<div class="doc_code">
-<pre>
-%<b>foo</b> = add i32 1, 2
-</pre>
-</div>
-
-<p><a name="nameWarning">The name of this instruction is "foo".</a> <b>NOTE</b>
-that the name of any value may be missing (an empty string), so names should
-<b>ONLY</b> be used for debugging (making the source code easier to read,
-debugging printouts), they should not be used to keep track of values or map
-between them.  For this purpose, use a <tt>std::map</tt> of pointers to the
-<tt>Value</tt> itself instead.</p>
-
-<p>One important aspect of LLVM is that there is no distinction between an SSA
-variable and the operation that produces it.  Because of this, any reference to
-the value produced by an instruction (or the value available as an incoming
-argument, for example) is represented as a direct pointer to the instance of
-the class that
-represents this value.  Although this may take some getting used to, it
-simplifies the representation and makes it easier to manipulate.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_Value">Important Public Members of the <tt>Value</tt> class</a>
-</h4>
-
-<div>
-
-<ul>
-  <li><tt>Value::use_iterator</tt> - Typedef for iterator over the
-use-list<br>
-    <tt>Value::const_use_iterator</tt> - Typedef for const_iterator over
-the use-list<br>
-    <tt>unsigned use_size()</tt> - Returns the number of users of the
-value.<br>
-    <tt>bool use_empty()</tt> - Returns true if there are no users.<br>
-    <tt>use_iterator use_begin()</tt> - Get an iterator to the start of
-the use-list.<br>
-    <tt>use_iterator use_end()</tt> - Get an iterator to the end of the
-use-list.<br>
-    <tt><a href="#User">User</a> *use_back()</tt> - Returns the last
-element in the list.
-    <p> These methods are the interface to access the def-use
-information in LLVM.  As with all other iterators in LLVM, the naming
-conventions follow the conventions defined by the <a href="#stl">STL</a>.</p>
-  </li>
-  <li><tt><a href="#Type">Type</a> *getType() const</tt>
-    <p>This method returns the Type of the Value.</p>
-  </li>
-  <li><tt>bool hasName() const</tt><br>
-    <tt>std::string getName() const</tt><br>
-    <tt>void setName(const std::string &amp;Name)</tt>
-    <p> This family of methods is used to access and assign a name to a <tt>Value</tt>,
-be aware of the <a href="#nameWarning">precaution above</a>.</p>
-  </li>
-  <li><tt>void replaceAllUsesWith(Value *V)</tt>
-
-    <p>This method traverses the use list of a <tt>Value</tt> changing all <a
-    href="#User"><tt>User</tt>s</a> of the current value to refer to
-    "<tt>V</tt>" instead.  For example, if you detect that an instruction always
-    produces a constant value (for example through constant folding), you can
-    replace all uses of the instruction with the constant like this:</p>
-
-<div class="doc_code">
-<pre>
-Inst-&gt;replaceAllUsesWith(ConstVal);
-</pre>
-</div>
-
-</ul>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="User">The <tt>User</tt> class</a>
-</h3>
-
-<div>
-  
-<p>
-<tt>#include "<a href="/doxygen/User_8h-source.html">llvm/User.h</a>"</tt><br>
-doxygen info: <a href="/doxygen/classllvm_1_1User.html">User Class</a><br>
-Superclass: <a href="#Value"><tt>Value</tt></a></p>
-
-<p>The <tt>User</tt> class is the common base class of all LLVM nodes that may
-refer to <a href="#Value"><tt>Value</tt></a>s.  It exposes a list of "Operands"
-that are all of the <a href="#Value"><tt>Value</tt></a>s that the User is
-referring to.  The <tt>User</tt> class itself is a subclass of
-<tt>Value</tt>.</p>
-
-<p>The operands of a <tt>User</tt> point directly to the LLVM <a
-href="#Value"><tt>Value</tt></a> that it refers to.  Because LLVM uses Static
-Single Assignment (SSA) form, there can only be one definition referred to,
-allowing this direct connection.  This connection provides the use-def
-information in LLVM.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_User">Important Public Members of the <tt>User</tt> class</a>
-</h4>
-
-<div>
-
-<p>The <tt>User</tt> class exposes the operand list in two ways: through
-an index access interface and through an iterator based interface.</p>
-
-<ul>
-  <li><tt>Value *getOperand(unsigned i)</tt><br>
-    <tt>unsigned getNumOperands()</tt>
-    <p> These two methods expose the operands of the <tt>User</tt> in a
-convenient form for direct access.</p></li>
-
-  <li><tt>User::op_iterator</tt> - Typedef for iterator over the operand
-list<br>
-    <tt>op_iterator op_begin()</tt> - Get an iterator to the start of 
-the operand list.<br>
-    <tt>op_iterator op_end()</tt> - Get an iterator to the end of the
-operand list.
-    <p> Together, these methods make up the iterator based interface to
-the operands of a <tt>User</tt>.</p></li>
-</ul>
-
-</div>    
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="Instruction">The <tt>Instruction</tt> class</a>
-</h3>
-
-<div>
-
-<p><tt>#include "</tt><tt><a
-href="/doxygen/Instruction_8h-source.html">llvm/Instruction.h</a>"</tt><br>
-doxygen info: <a href="/doxygen/classllvm_1_1Instruction.html">Instruction Class</a><br>
-Superclasses: <a href="#User"><tt>User</tt></a>, <a
-href="#Value"><tt>Value</tt></a></p>
-
-<p>The <tt>Instruction</tt> class is the common base class for all LLVM
-instructions.  It provides only a few methods, but is a very commonly used
-class.  The primary data tracked by the <tt>Instruction</tt> class itself is the
-opcode (instruction type) and the parent <a
-href="#BasicBlock"><tt>BasicBlock</tt></a> the <tt>Instruction</tt> is embedded
-into.  To represent a specific type of instruction, one of many subclasses of
-<tt>Instruction</tt> are used.</p>
-
-<p> Because the <tt>Instruction</tt> class subclasses the <a
-href="#User"><tt>User</tt></a> class, its operands can be accessed in the same
-way as for other <a href="#User"><tt>User</tt></a>s (with the
-<tt>getOperand()</tt>/<tt>getNumOperands()</tt> and
-<tt>op_begin()</tt>/<tt>op_end()</tt> methods).</p> <p> An important file for
-the <tt>Instruction</tt> class is the <tt>llvm/Instruction.def</tt> file. This
-file contains some meta-data about the various different types of instructions
-in LLVM.  It describes the enum values that are used as opcodes (for example
-<tt>Instruction::Add</tt> and <tt>Instruction::ICmp</tt>), as well as the
-concrete sub-classes of <tt>Instruction</tt> that implement the instruction (for
-example <tt><a href="#BinaryOperator">BinaryOperator</a></tt> and <tt><a
-href="#CmpInst">CmpInst</a></tt>).  Unfortunately, the use of macros in
-this file confuses doxygen, so these enum values don't show up correctly in the
-<a href="/doxygen/classllvm_1_1Instruction.html">doxygen output</a>.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="s_Instruction">
-    Important Subclasses of the <tt>Instruction</tt> class
-  </a>
-</h4>
-<div>
-  <ul>
-    <li><tt><a name="BinaryOperator">BinaryOperator</a></tt>
-    <p>This subclasses represents all two operand instructions whose operands
-    must be the same type, except for the comparison instructions.</p></li>
-    <li><tt><a name="CastInst">CastInst</a></tt>
-    <p>This subclass is the parent of the 12 casting instructions. It provides
-    common operations on cast instructions.</p>
-    <li><tt><a name="CmpInst">CmpInst</a></tt>
-    <p>This subclass respresents the two comparison instructions, 
-    <a href="LangRef.html#i_icmp">ICmpInst</a> (integer opreands), and
-    <a href="LangRef.html#i_fcmp">FCmpInst</a> (floating point operands).</p>
-    <li><tt><a name="TerminatorInst">TerminatorInst</a></tt>
-    <p>This subclass is the parent of all terminator instructions (those which
-    can terminate a block).</p>
-  </ul>
-  </div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_Instruction">
-    Important Public Members of the <tt>Instruction</tt> class
-  </a>
-</h4>
-
-<div>
-
-<ul>
-  <li><tt><a href="#BasicBlock">BasicBlock</a> *getParent()</tt>
-    <p>Returns the <a href="#BasicBlock"><tt>BasicBlock</tt></a> that
-this  <tt>Instruction</tt> is embedded into.</p></li>
-  <li><tt>bool mayWriteToMemory()</tt>
-    <p>Returns true if the instruction writes to memory, i.e. it is a
-      <tt>call</tt>,<tt>free</tt>,<tt>invoke</tt>, or <tt>store</tt>.</p></li>
-  <li><tt>unsigned getOpcode()</tt>
-    <p>Returns the opcode for the <tt>Instruction</tt>.</p></li>
-  <li><tt><a href="#Instruction">Instruction</a> *clone() const</tt>
-    <p>Returns another instance of the specified instruction, identical
-in all ways to the original except that the instruction has no parent
-(ie it's not embedded into a <a href="#BasicBlock"><tt>BasicBlock</tt></a>),
-and it has no name</p></li>
-</ul>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="Constant">The <tt>Constant</tt> class and subclasses</a>
-</h3>
-
-<div>
-
-<p>Constant represents a base class for different types of constants. It
-is subclassed by ConstantInt, ConstantArray, etc. for representing 
-the various types of Constants.  <a href="#GlobalValue">GlobalValue</a> is also
-a subclass, which represents the address of a global variable or function.
-</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>Important Subclasses of Constant</h4>
-<div>
-<ul>
-  <li>ConstantInt : This subclass of Constant represents an integer constant of
-  any width.
-    <ul>
-      <li><tt>const APInt&amp; getValue() const</tt>: Returns the underlying
-      value of this constant, an APInt value.</li>
-      <li><tt>int64_t getSExtValue() const</tt>: Converts the underlying APInt
-      value to an int64_t via sign extension. If the value (not the bit width)
-      of the APInt is too large to fit in an int64_t, an assertion will result.
-      For this reason, use of this method is discouraged.</li>
-      <li><tt>uint64_t getZExtValue() const</tt>: Converts the underlying APInt
-      value to a uint64_t via zero extension. IF the value (not the bit width)
-      of the APInt is too large to fit in a uint64_t, an assertion will result.
-      For this reason, use of this method is discouraged.</li>
-      <li><tt>static ConstantInt* get(const APInt&amp; Val)</tt>: Returns the
-      ConstantInt object that represents the value provided by <tt>Val</tt>.
-      The type is implied as the IntegerType that corresponds to the bit width
-      of <tt>Val</tt>.</li>
-      <li><tt>static ConstantInt* get(const Type *Ty, uint64_t Val)</tt>: 
-      Returns the ConstantInt object that represents the value provided by 
-      <tt>Val</tt> for integer type <tt>Ty</tt>.</li>
-    </ul>
-  </li>
-  <li>ConstantFP : This class represents a floating point constant.
-    <ul>
-      <li><tt>double getValue() const</tt>: Returns the underlying value of 
-      this constant. </li>
-    </ul>
-  </li>
-  <li>ConstantArray : This represents a constant array.
-    <ul>
-      <li><tt>const std::vector&lt;Use&gt; &amp;getValues() const</tt>: Returns 
-      a vector of component constants that makeup this array. </li>
-    </ul>
-  </li>
-  <li>ConstantStruct : This represents a constant struct.
-    <ul>
-      <li><tt>const std::vector&lt;Use&gt; &amp;getValues() const</tt>: Returns 
-      a vector of component constants that makeup this array. </li>
-    </ul>
-  </li>
-  <li>GlobalValue : This represents either a global variable or a function. In 
-  either case, the value is a constant fixed address (after linking). 
-  </li>
-</ul>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="GlobalValue">The <tt>GlobalValue</tt> class</a>
-</h3>
-
-<div>
-
-<p><tt>#include "<a
-href="/doxygen/GlobalValue_8h-source.html">llvm/GlobalValue.h</a>"</tt><br>
-doxygen info: <a href="/doxygen/classllvm_1_1GlobalValue.html">GlobalValue
-Class</a><br>
-Superclasses: <a href="#Constant"><tt>Constant</tt></a>, 
-<a href="#User"><tt>User</tt></a>, <a href="#Value"><tt>Value</tt></a></p>
-
-<p>Global values (<a href="#GlobalVariable"><tt>GlobalVariable</tt></a>s or <a
-href="#Function"><tt>Function</tt></a>s) are the only LLVM values that are
-visible in the bodies of all <a href="#Function"><tt>Function</tt></a>s.
-Because they are visible at global scope, they are also subject to linking with
-other globals defined in different translation units.  To control the linking
-process, <tt>GlobalValue</tt>s know their linkage rules. Specifically,
-<tt>GlobalValue</tt>s know whether they have internal or external linkage, as
-defined by the <tt>LinkageTypes</tt> enumeration.</p>
-
-<p>If a <tt>GlobalValue</tt> has internal linkage (equivalent to being
-<tt>static</tt> in C), it is not visible to code outside the current translation
-unit, and does not participate in linking.  If it has external linkage, it is
-visible to external code, and does participate in linking.  In addition to
-linkage information, <tt>GlobalValue</tt>s keep track of which <a
-href="#Module"><tt>Module</tt></a> they are currently part of.</p>
-
-<p>Because <tt>GlobalValue</tt>s are memory objects, they are always referred to
-by their <b>address</b>. As such, the <a href="#Type"><tt>Type</tt></a> of a
-global is always a pointer to its contents. It is important to remember this
-when using the <tt>GetElementPtrInst</tt> instruction because this pointer must
-be dereferenced first. For example, if you have a <tt>GlobalVariable</tt> (a
-subclass of <tt>GlobalValue)</tt> that is an array of 24 ints, type <tt>[24 x
-i32]</tt>, then the <tt>GlobalVariable</tt> is a pointer to that array. Although
-the address of the first element of this array and the value of the
-<tt>GlobalVariable</tt> are the same, they have different types. The
-<tt>GlobalVariable</tt>'s type is <tt>[24 x i32]</tt>. The first element's type
-is <tt>i32.</tt> Because of this, accessing a global value requires you to
-dereference the pointer with <tt>GetElementPtrInst</tt> first, then its elements
-can be accessed. This is explained in the <a href="LangRef.html#globalvars">LLVM
-Language Reference Manual</a>.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_GlobalValue">
-    Important Public Members of the <tt>GlobalValue</tt> class
-  </a>
-</h4>
-
-<div>
-
-<ul>
-  <li><tt>bool hasInternalLinkage() const</tt><br>
-    <tt>bool hasExternalLinkage() const</tt><br>
-    <tt>void setInternalLinkage(bool HasInternalLinkage)</tt>
-    <p> These methods manipulate the linkage characteristics of the <tt>GlobalValue</tt>.</p>
-    <p> </p>
-  </li>
-  <li><tt><a href="#Module">Module</a> *getParent()</tt>
-    <p> This returns the <a href="#Module"><tt>Module</tt></a> that the
-GlobalValue is currently embedded into.</p></li>
-</ul>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="Function">The <tt>Function</tt> class</a>
-</h3>
-
-<div>
-
-<p><tt>#include "<a
-href="/doxygen/Function_8h-source.html">llvm/Function.h</a>"</tt><br> doxygen
-info: <a href="/doxygen/classllvm_1_1Function.html">Function Class</a><br>
-Superclasses: <a href="#GlobalValue"><tt>GlobalValue</tt></a>, 
-<a href="#Constant"><tt>Constant</tt></a>, 
-<a href="#User"><tt>User</tt></a>, 
-<a href="#Value"><tt>Value</tt></a></p>
-
-<p>The <tt>Function</tt> class represents a single procedure in LLVM.  It is
-actually one of the more complex classes in the LLVM hierarchy because it must
-keep track of a large amount of data.  The <tt>Function</tt> class keeps track
-of a list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s, a list of formal 
-<a href="#Argument"><tt>Argument</tt></a>s, and a 
-<a href="#SymbolTable"><tt>SymbolTable</tt></a>.</p>
-
-<p>The list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s is the most
-commonly used part of <tt>Function</tt> objects.  The list imposes an implicit
-ordering of the blocks in the function, which indicate how the code will be
-laid out by the backend.  Additionally, the first <a
-href="#BasicBlock"><tt>BasicBlock</tt></a> is the implicit entry node for the
-<tt>Function</tt>.  It is not legal in LLVM to explicitly branch to this initial
-block.  There are no implicit exit nodes, and in fact there may be multiple exit
-nodes from a single <tt>Function</tt>.  If the <a
-href="#BasicBlock"><tt>BasicBlock</tt></a> list is empty, this indicates that
-the <tt>Function</tt> is actually a function declaration: the actual body of the
-function hasn't been linked in yet.</p>
-
-<p>In addition to a list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s, the
-<tt>Function</tt> class also keeps track of the list of formal <a
-href="#Argument"><tt>Argument</tt></a>s that the function receives.  This
-container manages the lifetime of the <a href="#Argument"><tt>Argument</tt></a>
-nodes, just like the <a href="#BasicBlock"><tt>BasicBlock</tt></a> list does for
-the <a href="#BasicBlock"><tt>BasicBlock</tt></a>s.</p>
-
-<p>The <a href="#SymbolTable"><tt>SymbolTable</tt></a> is a very rarely used
-LLVM feature that is only used when you have to look up a value by name.  Aside
-from that, the <a href="#SymbolTable"><tt>SymbolTable</tt></a> is used
-internally to make sure that there are not conflicts between the names of <a
-href="#Instruction"><tt>Instruction</tt></a>s, <a
-href="#BasicBlock"><tt>BasicBlock</tt></a>s, or <a
-href="#Argument"><tt>Argument</tt></a>s in the function body.</p>
-
-<p>Note that <tt>Function</tt> is a <a href="#GlobalValue">GlobalValue</a>
-and therefore also a <a href="#Constant">Constant</a>. The value of the function
-is its address (after linking) which is guaranteed to be constant.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_Function">
-    Important Public Members of the <tt>Function</tt> class
-  </a>
-</h4>
-
-<div>
-
-<ul>
-  <li><tt>Function(const </tt><tt><a href="#FunctionType">FunctionType</a>
-  *Ty, LinkageTypes Linkage, const std::string &amp;N = "", Module* Parent = 0)</tt>
-
-    <p>Constructor used when you need to create new <tt>Function</tt>s to add
-    the program.  The constructor must specify the type of the function to
-    create and what type of linkage the function should have. The <a 
-    href="#FunctionType"><tt>FunctionType</tt></a> argument
-    specifies the formal arguments and return value for the function. The same
-    <a href="#FunctionType"><tt>FunctionType</tt></a> value can be used to
-    create multiple functions. The <tt>Parent</tt> argument specifies the Module
-    in which the function is defined. If this argument is provided, the function
-    will automatically be inserted into that module's list of
-    functions.</p></li>
-
-  <li><tt>bool isDeclaration()</tt>
-
-    <p>Return whether or not the <tt>Function</tt> has a body defined.  If the
-    function is "external", it does not have a body, and thus must be resolved
-    by linking with a function defined in a different translation unit.</p></li>
-
-  <li><tt>Function::iterator</tt> - Typedef for basic block list iterator<br>
-    <tt>Function::const_iterator</tt> - Typedef for const_iterator.<br>
-
-    <tt>begin()</tt>, <tt>end()</tt>
-    <tt>size()</tt>, <tt>empty()</tt>
-
-    <p>These are forwarding methods that make it easy to access the contents of
-    a <tt>Function</tt> object's <a href="#BasicBlock"><tt>BasicBlock</tt></a>
-    list.</p></li>
-
-  <li><tt>Function::BasicBlockListType &amp;getBasicBlockList()</tt>
-
-    <p>Returns the list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s.  This
-    is necessary to use when you need to update the list or perform a complex
-    action that doesn't have a forwarding method.</p></li>
-
-  <li><tt>Function::arg_iterator</tt> - Typedef for the argument list
-iterator<br>
-    <tt>Function::const_arg_iterator</tt> - Typedef for const_iterator.<br>
-
-    <tt>arg_begin()</tt>, <tt>arg_end()</tt>
-    <tt>arg_size()</tt>, <tt>arg_empty()</tt>
-
-    <p>These are forwarding methods that make it easy to access the contents of
-    a <tt>Function</tt> object's <a href="#Argument"><tt>Argument</tt></a>
-    list.</p></li>
-
-  <li><tt>Function::ArgumentListType &amp;getArgumentList()</tt>
-
-    <p>Returns the list of <a href="#Argument"><tt>Argument</tt></a>s.  This is
-    necessary to use when you need to update the list or perform a complex
-    action that doesn't have a forwarding method.</p></li>
-
-  <li><tt><a href="#BasicBlock">BasicBlock</a> &amp;getEntryBlock()</tt>
-
-    <p>Returns the entry <a href="#BasicBlock"><tt>BasicBlock</tt></a> for the
-    function.  Because the entry block for the function is always the first
-    block, this returns the first block of the <tt>Function</tt>.</p></li>
-
-  <li><tt><a href="#Type">Type</a> *getReturnType()</tt><br>
-    <tt><a href="#FunctionType">FunctionType</a> *getFunctionType()</tt>
-
-    <p>This traverses the <a href="#Type"><tt>Type</tt></a> of the
-    <tt>Function</tt> and returns the return type of the function, or the <a
-    href="#FunctionType"><tt>FunctionType</tt></a> of the actual
-    function.</p></li>
-
-  <li><tt><a href="#SymbolTable">SymbolTable</a> *getSymbolTable()</tt>
-
-    <p> Return a pointer to the <a href="#SymbolTable"><tt>SymbolTable</tt></a>
-    for this <tt>Function</tt>.</p></li>
-</ul>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="GlobalVariable">The <tt>GlobalVariable</tt> class</a>
-</h3>
-
-<div>
-
-<p><tt>#include "<a
-href="/doxygen/GlobalVariable_8h-source.html">llvm/GlobalVariable.h</a>"</tt>
-<br>
-doxygen info: <a href="/doxygen/classllvm_1_1GlobalVariable.html">GlobalVariable
- Class</a><br>
-Superclasses: <a href="#GlobalValue"><tt>GlobalValue</tt></a>, 
-<a href="#Constant"><tt>Constant</tt></a>,
-<a href="#User"><tt>User</tt></a>,
-<a href="#Value"><tt>Value</tt></a></p>
-
-<p>Global variables are represented with the (surprise surprise)
-<tt>GlobalVariable</tt> class. Like functions, <tt>GlobalVariable</tt>s are also
-subclasses of <a href="#GlobalValue"><tt>GlobalValue</tt></a>, and as such are
-always referenced by their address (global values must live in memory, so their
-"name" refers to their constant address). See 
-<a href="#GlobalValue"><tt>GlobalValue</tt></a> for more on this.  Global 
-variables may have an initial value (which must be a 
-<a href="#Constant"><tt>Constant</tt></a>), and if they have an initializer, 
-they may be marked as "constant" themselves (indicating that their contents 
-never change at runtime).</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_GlobalVariable">
-    Important Public Members of the <tt>GlobalVariable</tt> class
-  </a>
-</h4>
-
-<div>
-
-<ul>
-  <li><tt>GlobalVariable(const </tt><tt><a href="#Type">Type</a> *Ty, bool
-  isConstant, LinkageTypes&amp; Linkage, <a href="#Constant">Constant</a>
-  *Initializer = 0, const std::string &amp;Name = "", Module* Parent = 0)</tt>
-
-    <p>Create a new global variable of the specified type. If
-    <tt>isConstant</tt> is true then the global variable will be marked as
-    unchanging for the program. The Linkage parameter specifies the type of
-    linkage (internal, external, weak, linkonce, appending) for the variable.
-    If the linkage is InternalLinkage, WeakAnyLinkage, WeakODRLinkage,
-    LinkOnceAnyLinkage or LinkOnceODRLinkage,&nbsp; then the resultant
-    global variable will have internal linkage.  AppendingLinkage concatenates
-    together all instances (in different translation units) of the variable
-    into a single variable but is only applicable to arrays.  &nbsp;See
-    the <a href="LangRef.html#modulestructure">LLVM Language Reference</a> for
-    further details on linkage types. Optionally an initializer, a name, and the
-    module to put the variable into may be specified for the global variable as
-    well.</p></li>
-
-  <li><tt>bool isConstant() const</tt>
-
-    <p>Returns true if this is a global variable that is known not to
-    be modified at runtime.</p></li>
-
-  <li><tt>bool hasInitializer()</tt>
-
-    <p>Returns true if this <tt>GlobalVariable</tt> has an intializer.</p></li>
-
-  <li><tt><a href="#Constant">Constant</a> *getInitializer()</tt>
-
-    <p>Returns the initial value for a <tt>GlobalVariable</tt>.  It is not legal
-    to call this method if there is no initializer.</p></li>
-</ul>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="BasicBlock">The <tt>BasicBlock</tt> class</a>
-</h3>
-
-<div>
-
-<p><tt>#include "<a
-href="/doxygen/BasicBlock_8h-source.html">llvm/BasicBlock.h</a>"</tt><br>
-doxygen info: <a href="/doxygen/classllvm_1_1BasicBlock.html">BasicBlock
-Class</a><br>
-Superclass: <a href="#Value"><tt>Value</tt></a></p>
-
-<p>This class represents a single entry single exit section of the code,
-commonly known as a basic block by the compiler community.  The
-<tt>BasicBlock</tt> class maintains a list of <a
-href="#Instruction"><tt>Instruction</tt></a>s, which form the body of the block.
-Matching the language definition, the last element of this list of instructions
-is always a terminator instruction (a subclass of the <a
-href="#TerminatorInst"><tt>TerminatorInst</tt></a> class).</p>
-
-<p>In addition to tracking the list of instructions that make up the block, the
-<tt>BasicBlock</tt> class also keeps track of the <a
-href="#Function"><tt>Function</tt></a> that it is embedded into.</p>
-
-<p>Note that <tt>BasicBlock</tt>s themselves are <a
-href="#Value"><tt>Value</tt></a>s, because they are referenced by instructions
-like branches and can go in the switch tables. <tt>BasicBlock</tt>s have type
-<tt>label</tt>.</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="m_BasicBlock">
-    Important Public Members of the <tt>BasicBlock</tt> class
-  </a>
-</h4>
-
-<div>
-<ul>
-
-<li><tt>BasicBlock(const std::string &amp;Name = "", </tt><tt><a
- href="#Function">Function</a> *Parent = 0)</tt>
-
-<p>The <tt>BasicBlock</tt> constructor is used to create new basic blocks for
-insertion into a function.  The constructor optionally takes a name for the new
-block, and a <a href="#Function"><tt>Function</tt></a> to insert it into.  If
-the <tt>Parent</tt> parameter is specified, the new <tt>BasicBlock</tt> is
-automatically inserted at the end of the specified <a
-href="#Function"><tt>Function</tt></a>, if not specified, the BasicBlock must be
-manually inserted into the <a href="#Function"><tt>Function</tt></a>.</p></li>
-
-<li><tt>BasicBlock::iterator</tt> - Typedef for instruction list iterator<br>
-<tt>BasicBlock::const_iterator</tt> - Typedef for const_iterator.<br>
-<tt>begin()</tt>, <tt>end()</tt>, <tt>front()</tt>, <tt>back()</tt>,
-<tt>size()</tt>, <tt>empty()</tt>
-STL-style functions for accessing the instruction list.
-
-<p>These methods and typedefs are forwarding functions that have the same
-semantics as the standard library methods of the same names.  These methods
-expose the underlying instruction list of a basic block in a way that is easy to
-manipulate.  To get the full complement of container operations (including
-operations to update the list), you must use the <tt>getInstList()</tt>
-method.</p></li>
-
-<li><tt>BasicBlock::InstListType &amp;getInstList()</tt>
-
-<p>This method is used to get access to the underlying container that actually
-holds the Instructions.  This method must be used when there isn't a forwarding
-function in the <tt>BasicBlock</tt> class for the operation that you would like
-to perform.  Because there are no forwarding functions for "updating"
-operations, you need to use this if you want to update the contents of a
-<tt>BasicBlock</tt>.</p></li>
-
-<li><tt><a href="#Function">Function</a> *getParent()</tt>
-
-<p> Returns a pointer to <a href="#Function"><tt>Function</tt></a> the block is
-embedded into, or a null pointer if it is homeless.</p></li>
-
-<li><tt><a href="#TerminatorInst">TerminatorInst</a> *getTerminator()</tt>
-
-<p> Returns a pointer to the terminator instruction that appears at the end of
-the <tt>BasicBlock</tt>.  If there is no terminator instruction, or if the last
-instruction in the block is not a terminator, then a null pointer is
-returned.</p></li>
-
-</ul>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="Argument">The <tt>Argument</tt> class</a>
-</h3>
-
-<div>
-
-<p>This subclass of Value defines the interface for incoming formal
-arguments to a function. A Function maintains a list of its formal
-arguments. An argument has a pointer to the parent Function.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01 Strict"></a>
-
-  <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a> and
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-
-</body>
-</html>
diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst
new file mode 100644
index 0000000000..2b272de425
--- /dev/null
+++ b/docs/ProgrammersManual.rst
@@ -0,0 +1,3166 @@
+========================
+LLVM Programmer's Manual
+========================
+
+.. contents::
+   :local:
+
+.. warning::
+   This is a work in progress.
+
+.. sectionauthor:: Chris Lattner <sabre@nondot.org>,
+                   Dinakar Dhurjati <dhurjati@cs.uiuc.edu>,
+                   Gabor Greif <ggreif@gmail.com>,
+                   Joel Stanley <jstanley@cs.uiuc.edu>,
+                   Reid Spencer <rspencer@x10sys.com> and
+                   Owen Anderson <owen@apple.com>
+
+.. _introduction:
+
+Introduction
+============
+
+This document is meant to highlight some of the important classes and interfaces
+available in the LLVM source-base.  This manual is not intended to explain what
+LLVM is, how it works, and what LLVM code looks like.  It assumes that you know
+the basics of LLVM and are interested in writing transformations or otherwise
+analyzing or manipulating the code.
+
+This document should get you oriented so that you can find your way in the
+continuously growing source code that makes up the LLVM infrastructure.  Note
+that this manual is not intended to serve as a replacement for reading the
+source code, so if you think there should be a method in one of these classes to
+do something, but it's not listed, check the source.  Links to the `doxygen
+<http://llvm.org/doxygen/>`__ sources are provided to make this as easy as
+possible.
+
+The first section of this document describes general information that is useful
+to know when working in the LLVM infrastructure, and the second describes the
+Core LLVM classes.  In the future this manual will be extended with information
+describing how to use extension libraries, such as dominator information, CFG
+traversal routines, and useful utilities like the ``InstVisitor`` (`doxygen
+<http://llvm.org/doxygen/InstVisitor_8h-source.html>`__) template.
+
+.. _general:
+
+General Information
+===================
+
+This section contains general information that is useful if you are working in
+the LLVM source-base, but that isn't specific to any particular API.
+
+.. _stl:
+
+The C++ Standard Template Library
+---------------------------------
+
+LLVM makes heavy use of the C++ Standard Template Library (STL), perhaps much
+more than you are used to, or have seen before.  Because of this, you might want
+to do a little background reading in the techniques used and capabilities of the
+library.  There are many good pages that discuss the STL, and several books on
+the subject that you can get, so it will not be discussed in this document.
+
+Here are some useful links:
+
+#. `cppreference.com
+   <http://en.cppreference.com/w/>`_ - an excellent
+   reference for the STL and other parts of the standard C++ library.
+
+#. `C++ In a Nutshell <http://www.tempest-sw.com/cpp/>`_ - This is an O'Reilly
+   book in the making.  It has a decent Standard Library Reference that rivals
+   Dinkumware's, and is unfortunately no longer free since the book has been
+   published.
+
+#. `C++ Frequently Asked Questions <http://www.parashift.com/c++-faq-lite/>`_.
+
+#. `SGI's STL Programmer's Guide <http://www.sgi.com/tech/stl/>`_ - Contains a
+   useful `Introduction to the STL
+   <http://www.sgi.com/tech/stl/stl_introduction.html>`_.
+
+#. `Bjarne Stroustrup's C++ Page
+   <http://www.research.att.com/%7Ebs/C++.html>`_.
+
+#. `Bruce Eckel's Thinking in C++, 2nd ed. Volume 2 Revision 4.0
+   (even better, get the book)
+   <http://www.mindview.net/Books/TICPP/ThinkingInCPP2e.html>`_.
+
+You are also encouraged to take a look at the :ref:`LLVM Coding Standards
+<coding_standards>` guide which focuses on how to write maintainable code more
+than where to put your curly braces.
+
+.. _resources:
+
+Other useful references
+-----------------------
+
+#. `Using static and shared libraries across platforms
+   <http://www.fortran-2000.com/ArnaudRecipes/sharedlib.html>`_
+
+.. _apis:
+
+Important and useful LLVM APIs
+==============================
+
+Here we highlight some LLVM APIs that are generally useful and good to know
+about when writing transformations.
+
+.. _isa:
+
+The ``isa<>``, ``cast<>`` and ``dyn_cast<>`` templates
+------------------------------------------------------
+
+The LLVM source-base makes extensive use of a custom form of RTTI.  These
+templates have many similarities to the C++ ``dynamic_cast<>`` operator, but
+they don't have some drawbacks (primarily stemming from the fact that
+``dynamic_cast<>`` only works on classes that have a v-table).  Because they are
+used so often, you must know what they do and how they work.  All of these
+templates are defined in the ``llvm/Support/Casting.h`` (`doxygen
+<http://llvm.org/doxygen/Casting_8h-source.html>`__) file (note that you very
+rarely have to include this file directly).
+
+``isa<>``:
+  The ``isa<>`` operator works exactly like the Java "``instanceof``" operator.
+  It returns true or false depending on whether a reference or pointer points to
+  an instance of the specified class.  This can be very useful for constraint
+  checking of various sorts (example below).
+
+``cast<>``:
+  The ``cast<>`` operator is a "checked cast" operation.  It converts a pointer
+  or reference from a base class to a derived class, causing an assertion
+  failure if it is not really an instance of the right type.  This should be
+  used in cases where you have some information that makes you believe that
+  something is of the right type.  An example of the ``isa<>`` and ``cast<>``
+  template is:
+
+  .. code-block:: c++
+
+    static bool isLoopInvariant(const Value *V, const Loop *L) {
+      if (isa<Constant>(V) || isa<Argument>(V) || isa<GlobalValue>(V))
+        return true;
+
+      // Otherwise, it must be an instruction...
+      return !L->contains(cast<Instruction>(V)->getParent());
+    }
+
+  Note that you should **not** use an ``isa<>`` test followed by a ``cast<>``,
+  for that use the ``dyn_cast<>`` operator.
+
+``dyn_cast<>``:
+  The ``dyn_cast<>`` operator is a "checking cast" operation.  It checks to see
+  if the operand is of the specified type, and if so, returns a pointer to it
+  (this operator does not work with references).  If the operand is not of the
+  correct type, a null pointer is returned.  Thus, this works very much like
+  the ``dynamic_cast<>`` operator in C++, and should be used in the same
+  circumstances.  Typically, the ``dyn_cast<>`` operator is used in an ``if``
+  statement or some other flow control statement like this:
+
+  .. code-block:: c++
+
+    if (AllocationInst *AI = dyn_cast<AllocationInst>(Val)) {
+      // ...
+    }
+
+  This form of the ``if`` statement effectively combines together a call to
+  ``isa<>`` and a call to ``cast<>`` into one statement, which is very
+  convenient.
+
+  Note that the ``dyn_cast<>`` operator, like C++'s ``dynamic_cast<>`` or Java's
+  ``instanceof`` operator, can be abused.  In particular, you should not use big
+  chained ``if/then/else`` blocks to check for lots of different variants of
+  classes.  If you find yourself wanting to do this, it is much cleaner and more
+  efficient to use the ``InstVisitor`` class to dispatch over the instruction
+  type directly.
+
+``cast_or_null<>``:
+  The ``cast_or_null<>`` operator works just like the ``cast<>`` operator,
+  except that it allows for a null pointer as an argument (which it then
+  propagates).  This can sometimes be useful, allowing you to combine several
+  null checks into one.
+
+``dyn_cast_or_null<>``:
+  The ``dyn_cast_or_null<>`` operator works just like the ``dyn_cast<>``
+  operator, except that it allows for a null pointer as an argument (which it
+  then propagates).  This can sometimes be useful, allowing you to combine
+  several null checks into one.
+
+These five templates can be used with any classes, whether they have a v-table
+or not.  If you want to add support for these templates, see the document
+:ref:`How to set up LLVM-style RTTI for your class hierarchy
+<how-to-set-up-llvm-style-rtti>`
+
+.. _string_apis:
+
+Passing strings (the ``StringRef`` and ``Twine`` classes)
+---------------------------------------------------------
+
+Although LLVM generally does not do much string manipulation, we do have several
+important APIs which take strings.  Two important examples are the Value class
+-- which has names for instructions, functions, etc. -- and the ``StringMap``
+class which is used extensively in LLVM and Clang.
+
+These are generic classes, and they need to be able to accept strings which may
+have embedded null characters.  Therefore, they cannot simply take a ``const
+char *``, and taking a ``const std::string&`` requires clients to perform a heap
+allocation which is usually unnecessary.  Instead, many LLVM APIs use a
+``StringRef`` or a ``const Twine&`` for passing strings efficiently.
+
+.. _StringRef:
+
+The ``StringRef`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``StringRef`` data type represents a reference to a constant string (a
+character array and a length) and supports the common operations available on
+``std::string``, but does not require heap allocation.
+
+It can be implicitly constructed using a C style null-terminated string, an
+``std::string``, or explicitly with a character pointer and length.  For
+example, the ``StringRef`` find function is declared as:
+
+.. code-block:: c++
+
+  iterator find(StringRef Key);
+
+and clients can call it using any one of:
+
+.. code-block:: c++
+
+  Map.find("foo");                 // Lookup "foo"
+  Map.find(std::string("bar"));    // Lookup "bar"
+  Map.find(StringRef("\0baz", 4)); // Lookup "\0baz"
+
+Similarly, APIs which need to return a string may return a ``StringRef``
+instance, which can be used directly or converted to an ``std::string`` using
+the ``str`` member function.  See ``llvm/ADT/StringRef.h`` (`doxygen
+<http://llvm.org/doxygen/classllvm_1_1StringRef_8h-source.html>`__) for more
+information.
+
+You should rarely use the ``StringRef`` class directly, because it contains
+pointers to external memory it is not generally safe to store an instance of the
+class (unless you know that the external storage will not be freed).
+``StringRef`` is small and pervasive enough in LLVM that it should always be
+passed by value.
+
+The ``Twine`` class
+^^^^^^^^^^^^^^^^^^^
+
+The ``Twine`` (`doxygen <http://llvm.org/doxygen/classllvm_1_1Twine.html>`__)
+class is an efficient way for APIs to accept concatenated strings.  For example,
+a common LLVM paradigm is to name one instruction based on the name of another
+instruction with a suffix, for example:
+
+.. code-block:: c++
+
+    New = CmpInst::Create(..., SO->getName() + ".cmp");
+
+The ``Twine`` class is effectively a lightweight `rope
+<http://en.wikipedia.org/wiki/Rope_(computer_science)>`_ which points to
+temporary (stack allocated) objects.  Twines can be implicitly constructed as
+the result of the plus operator applied to strings (i.e., a C strings, an
+``std::string``, or a ``StringRef``).  The twine delays the actual concatenation
+of strings until it is actually required, at which point it can be efficiently
+rendered directly into a character array.  This avoids unnecessary heap
+allocation involved in constructing the temporary results of string
+concatenation.  See ``llvm/ADT/Twine.h`` (`doxygen
+<http://llvm.org/doxygen/Twine_8h_source.html>`__) and :ref:`here <dss_twine>`
+for more information.
+
+As with a ``StringRef``, ``Twine`` objects point to external memory and should
+almost never be stored or mentioned directly.  They are intended solely for use
+when defining a function which should be able to efficiently accept concatenated
+strings.
+
+.. _DEBUG:
+
+The ``DEBUG()`` macro and ``-debug`` option
+-------------------------------------------
+
+Often when working on your pass you will put a bunch of debugging printouts and
+other code into your pass.  After you get it working, you want to remove it, but
+you may need it again in the future (to work out new bugs that you run across).
+
+Naturally, because of this, you don't want to delete the debug printouts, but
+you don't want them to always be noisy.  A standard compromise is to comment
+them out, allowing you to enable them if you need them in the future.
+
+The ``llvm/Support/Debug.h`` (`doxygen
+<http://llvm.org/doxygen/Debug_8h-source.html>`__) file provides a macro named
+``DEBUG()`` that is a much nicer solution to this problem.  Basically, you can
+put arbitrary code into the argument of the ``DEBUG`` macro, and it is only
+executed if '``opt``' (or any other tool) is run with the '``-debug``' command
+line argument:
+
+.. code-block:: c++
+
+  DEBUG(errs() << "I am here!\n");
+
+Then you can run your pass like this:
+
+.. code-block:: none
+
+  $ opt < a.bc > /dev/null -mypass
+  <no output>
+  $ opt < a.bc > /dev/null -mypass -debug
+  I am here!
+
+Using the ``DEBUG()`` macro instead of a home-brewed solution allows you to not
+have to create "yet another" command line option for the debug output for your
+pass.  Note that ``DEBUG()`` macros are disabled for optimized builds, so they
+do not cause a performance impact at all (for the same reason, they should also
+not contain side-effects!).
+
+One additional nice thing about the ``DEBUG()`` macro is that you can enable or
+disable it directly in gdb.  Just use "``set DebugFlag=0``" or "``set
+DebugFlag=1``" from the gdb if the program is running.  If the program hasn't
+been started yet, you can always just run it with ``-debug``.
+
+.. _DEBUG_TYPE:
+
+Fine grained debug info with ``DEBUG_TYPE`` and the ``-debug-only`` option
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sometimes you may find yourself in a situation where enabling ``-debug`` just
+turns on **too much** information (such as when working on the code generator).
+If you want to enable debug information with more fine-grained control, you
+define the ``DEBUG_TYPE`` macro and the ``-debug`` only option as follows:
+
+.. code-block:: c++
+
+  #undef  DEBUG_TYPE
+  DEBUG(errs() << "No debug type\n");
+  #define DEBUG_TYPE "foo"
+  DEBUG(errs() << "'foo' debug type\n");
+  #undef  DEBUG_TYPE
+  #define DEBUG_TYPE "bar"
+  DEBUG(errs() << "'bar' debug type\n"));
+  #undef  DEBUG_TYPE
+  #define DEBUG_TYPE ""
+  DEBUG(errs() << "No debug type (2)\n");
+
+Then you can run your pass like this:
+
+.. code-block:: none
+
+  $ opt < a.bc > /dev/null -mypass
+  <no output>
+  $ opt < a.bc > /dev/null -mypass -debug
+  No debug type
+  'foo' debug type
+  'bar' debug type
+  No debug type (2)
+  $ opt < a.bc > /dev/null -mypass -debug-only=foo
+  'foo' debug type
+  $ opt < a.bc > /dev/null -mypass -debug-only=bar
+  'bar' debug type
+
+Of course, in practice, you should only set ``DEBUG_TYPE`` at the top of a file,
+to specify the debug type for the entire module (if you do this before you
+``#include "llvm/Support/Debug.h"``, you don't have to insert the ugly
+``#undef``'s).  Also, you should use names more meaningful than "foo" and "bar",
+because there is no system in place to ensure that names do not conflict.  If
+two different modules use the same string, they will all be turned on when the
+name is specified.  This allows, for example, all debug information for
+instruction scheduling to be enabled with ``-debug-type=InstrSched``, even if
+the source lives in multiple files.
+
+The ``DEBUG_WITH_TYPE`` macro is also available for situations where you would
+like to set ``DEBUG_TYPE``, but only for one specific ``DEBUG`` statement.  It
+takes an additional first parameter, which is the type to use.  For example, the
+preceding example could be written as:
+
+.. code-block:: c++
+
+  DEBUG_WITH_TYPE("", errs() << "No debug type\n");
+  DEBUG_WITH_TYPE("foo", errs() << "'foo' debug type\n");
+  DEBUG_WITH_TYPE("bar", errs() << "'bar' debug type\n"));
+  DEBUG_WITH_TYPE("", errs() << "No debug type (2)\n");
+
+.. _Statistic:
+
+The ``Statistic`` class & ``-stats`` option
+-------------------------------------------
+
+The ``llvm/ADT/Statistic.h`` (`doxygen
+<http://llvm.org/doxygen/Statistic_8h-source.html>`__) file provides a class
+named ``Statistic`` that is used as a unified way to keep track of what the LLVM
+compiler is doing and how effective various optimizations are.  It is useful to
+see what optimizations are contributing to making a particular program run
+faster.
+
+Often you may run your pass on some big program, and you're interested to see
+how many times it makes a certain transformation.  Although you can do this with
+hand inspection, or some ad-hoc method, this is a real pain and not very useful
+for big programs.  Using the ``Statistic`` class makes it very easy to keep
+track of this information, and the calculated information is presented in a
+uniform manner with the rest of the passes being executed.
+
+There are many examples of ``Statistic`` uses, but the basics of using it are as
+follows:
+
+#. Define your statistic like this:
+
+  .. code-block:: c++
+
+    #define DEBUG_TYPE "mypassname"   // This goes before any #includes.
+    STATISTIC(NumXForms, "The # of times I did stuff");
+
+  The ``STATISTIC`` macro defines a static variable, whose name is specified by
+  the first argument.  The pass name is taken from the ``DEBUG_TYPE`` macro, and
+  the description is taken from the second argument.  The variable defined
+  ("NumXForms" in this case) acts like an unsigned integer.
+
+#. Whenever you make a transformation, bump the counter:
+
+  .. code-block:: c++
+
+    ++NumXForms;   // I did stuff!
+
+That's all you have to do.  To get '``opt``' to print out the statistics
+gathered, use the '``-stats``' option:
+
+.. code-block:: none
+
+  $ opt -stats -mypassname < program.bc > /dev/null
+  ... statistics output ...
+
+When running ``opt`` on a C file from the SPEC benchmark suite, it gives a
+report that looks like this:
+
+.. code-block:: none
+
+   7646 bitcodewriter   - Number of normal instructions
+    725 bitcodewriter   - Number of oversized instructions
+ 129996 bitcodewriter   - Number of bitcode bytes written
+   2817 raise           - Number of insts DCEd or constprop'd
+   3213 raise           - Number of cast-of-self removed
+   5046 raise           - Number of expression trees converted
+     75 raise           - Number of other getelementptr's formed
+    138 raise           - Number of load/store peepholes
+     42 deadtypeelim    - Number of unused typenames removed from symtab
+    392 funcresolve     - Number of varargs functions resolved
+     27 globaldce       - Number of global variables removed
+      2 adce            - Number of basic blocks removed
+    134 cee             - Number of branches revectored
+     49 cee             - Number of setcc instruction eliminated
+    532 gcse            - Number of loads removed
+   2919 gcse            - Number of instructions removed
+     86 indvars         - Number of canonical indvars added
+     87 indvars         - Number of aux indvars removed
+     25 instcombine     - Number of dead inst eliminate
+    434 instcombine     - Number of insts combined
+    248 licm            - Number of load insts hoisted
+   1298 licm            - Number of insts hoisted to a loop pre-header
+      3 licm            - Number of insts hoisted to multiple loop preds (bad, no loop pre-header)
+     75 mem2reg         - Number of alloca's promoted
+   1444 cfgsimplify     - Number of blocks simplified
+
+Obviously, with so many optimizations, having a unified framework for this stuff
+is very nice.  Making your pass fit well into the framework makes it more
+maintainable and useful.
+
+.. _ViewGraph:
+
+Viewing graphs while debugging code
+-----------------------------------
+
+Several of the important data structures in LLVM are graphs: for example CFGs
+made out of LLVM :ref:`BasicBlocks <BasicBlock>`, CFGs made out of LLVM
+:ref:`MachineBasicBlocks <MachineBasicBlock>`, and :ref:`Instruction Selection
+DAGs <SelectionDAG>`.  In many cases, while debugging various parts of the
+compiler, it is nice to instantly visualize these graphs.
+
+LLVM provides several callbacks that are available in a debug build to do
+exactly that.  If you call the ``Function::viewCFG()`` method, for example, the
+current LLVM tool will pop up a window containing the CFG for the function where
+each basic block is a node in the graph, and each node contains the instructions
+in the block.  Similarly, there also exists ``Function::viewCFGOnly()`` (does
+not include the instructions), the ``MachineFunction::viewCFG()`` and
+``MachineFunction::viewCFGOnly()``, and the ``SelectionDAG::viewGraph()``
+methods.  Within GDB, for example, you can usually use something like ``call
+DAG.viewGraph()`` to pop up a window.  Alternatively, you can sprinkle calls to
+these functions in your code in places you want to debug.
+
+Getting this to work requires a small amount of configuration.  On Unix systems
+with X11, install the `graphviz <http://www.graphviz.org>`_ toolkit, and make
+sure 'dot' and 'gv' are in your path.  If you are running on Mac OS/X, download
+and install the Mac OS/X `Graphviz program
+<http://www.pixelglow.com/graphviz/>`_ and add
+``/Applications/Graphviz.app/Contents/MacOS/`` (or wherever you install it) to
+your path.  Once in your system and path are set up, rerun the LLVM configure
+script and rebuild LLVM to enable this functionality.
+
+``SelectionDAG`` has been extended to make it easier to locate *interesting*
+nodes in large complex graphs.  From gdb, if you ``call DAG.setGraphColor(node,
+"color")``, then the next ``call DAG.viewGraph()`` would highlight the node in
+the specified color (choices of colors can be found at `colors
+<http://www.graphviz.org/doc/info/colors.html>`_.) More complex node attributes
+can be provided with ``call DAG.setGraphAttrs(node, "attributes")`` (choices can
+be found at `Graph attributes <http://www.graphviz.org/doc/info/attrs.html>`_.)
+If you want to restart and clear all the current graph attributes, then you can
+``call DAG.clearGraphAttrs()``.
+
+Note that graph visualization features are compiled out of Release builds to
+reduce file size.  This means that you need a Debug+Asserts or Release+Asserts
+build to use these features.
+
+.. _datastructure:
+
+Picking the Right Data Structure for a Task
+===========================================
+
+LLVM has a plethora of data structures in the ``llvm/ADT/`` directory, and we
+commonly use STL data structures.  This section describes the trade-offs you
+should consider when you pick one.
+
+The first step is a choose your own adventure: do you want a sequential
+container, a set-like container, or a map-like container?  The most important
+thing when choosing a container is the algorithmic properties of how you plan to
+access the container.  Based on that, you should use:
+
+
+* a :ref:`map-like <ds_map>` container if you need efficient look-up of a
+  value based on another value.  Map-like containers also support efficient
+  queries for containment (whether a key is in the map).  Map-like containers
+  generally do not support efficient reverse mapping (values to keys).  If you
+  need that, use two maps.  Some map-like containers also support efficient
+  iteration through the keys in sorted order.  Map-like containers are the most
+  expensive sort, only use them if you need one of these capabilities.
+
+* a :ref:`set-like <ds_set>` container if you need to put a bunch of stuff into
+  a container that automatically eliminates duplicates.  Some set-like
+  containers support efficient iteration through the elements in sorted order.
+  Set-like containers are more expensive than sequential containers.
+
+* a :ref:`sequential <ds_sequential>` container provides the most efficient way
+  to add elements and keeps track of the order they are added to the collection.
+  They permit duplicates and support efficient iteration, but do not support
+  efficient look-up based on a key.
+
+* a :ref:`string <ds_string>` container is a specialized sequential container or
+  reference structure that is used for character or byte arrays.
+
+* a :ref:`bit <ds_bit>` container provides an efficient way to store and
+  perform set operations on sets of numeric id's, while automatically
+  eliminating duplicates.  Bit containers require a maximum of 1 bit for each
+  identifier you want to store.
+
+Once the proper category of container is determined, you can fine tune the
+memory use, constant factors, and cache behaviors of access by intelligently
+picking a member of the category.  Note that constant factors and cache behavior
+can be a big deal.  If you have a vector that usually only contains a few
+elements (but could contain many), for example, it's much better to use
+:ref:`SmallVector <dss_smallvector>` than :ref:`vector <dss_vector>`.  Doing so
+avoids (relatively) expensive malloc/free calls, which dwarf the cost of adding
+the elements to the container.
+
+.. _ds_sequential:
+
+Sequential Containers (std::vector, std::list, etc)
+---------------------------------------------------
+
+There are a variety of sequential containers available for you, based on your
+needs.  Pick the first in this section that will do what you want.
+
+.. _dss_arrayref:
+
+llvm/ADT/ArrayRef.h
+^^^^^^^^^^^^^^^^^^^
+
+The ``llvm::ArrayRef`` class is the preferred class to use in an interface that
+accepts a sequential list of elements in memory and just reads from them.  By
+taking an ``ArrayRef``, the API can be passed a fixed size array, an
+``std::vector``, an ``llvm::SmallVector`` and anything else that is contiguous
+in memory.
+
+.. _dss_fixedarrays:
+
+Fixed Size Arrays
+^^^^^^^^^^^^^^^^^
+
+Fixed size arrays are very simple and very fast.  They are good if you know
+exactly how many elements you have, or you have a (low) upper bound on how many
+you have.
+
+.. _dss_heaparrays:
+
+Heap Allocated Arrays
+^^^^^^^^^^^^^^^^^^^^^
+
+Heap allocated arrays (``new[]`` + ``delete[]``) are also simple.  They are good
+if the number of elements is variable, if you know how many elements you will
+need before the array is allocated, and if the array is usually large (if not,
+consider a :ref:`SmallVector <dss_smallvector>`).  The cost of a heap allocated
+array is the cost of the new/delete (aka malloc/free).  Also note that if you
+are allocating an array of a type with a constructor, the constructor and
+destructors will be run for every element in the array (re-sizable vectors only
+construct those elements actually used).
+
+.. _dss_tinyptrvector:
+
+llvm/ADT/TinyPtrVector.h
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+``TinyPtrVector<Type>`` is a highly specialized collection class that is
+optimized to avoid allocation in the case when a vector has zero or one
+elements.  It has two major restrictions: 1) it can only hold values of pointer
+type, and 2) it cannot hold a null pointer.
+
+Since this container is highly specialized, it is rarely used.
+
+.. _dss_smallvector:
+
+llvm/ADT/SmallVector.h
+^^^^^^^^^^^^^^^^^^^^^^
+
+``SmallVector<Type, N>`` is a simple class that looks and smells just like
+``vector<Type>``: it supports efficient iteration, lays out elements in memory
+order (so you can do pointer arithmetic between elements), supports efficient
+push_back/pop_back operations, supports efficient random access to its elements,
+etc.
+
+The advantage of SmallVector is that it allocates space for some number of
+elements (N) **in the object itself**.  Because of this, if the SmallVector is
+dynamically smaller than N, no malloc is performed.  This can be a big win in
+cases where the malloc/free call is far more expensive than the code that
+fiddles around with the elements.
+
+This is good for vectors that are "usually small" (e.g. the number of
+predecessors/successors of a block is usually less than 8).  On the other hand,
+this makes the size of the SmallVector itself large, so you don't want to
+allocate lots of them (doing so will waste a lot of space).  As such,
+SmallVectors are most useful when on the stack.
+
+SmallVector also provides a nice portable and efficient replacement for
+``alloca``.
+
+.. _dss_vector:
+
+<vector>
+^^^^^^^^
+
+``std::vector`` is well loved and respected.  It is useful when SmallVector
+isn't: when the size of the vector is often large (thus the small optimization
+will rarely be a benefit) or if you will be allocating many instances of the
+vector itself (which would waste space for elements that aren't in the
+container).  vector is also useful when interfacing with code that expects
+vectors :).
+
+One worthwhile note about std::vector: avoid code like this:
+
+.. code-block:: c++
+
+  for ( ... ) {
+     std::vector<foo> V;
+     // make use of V.
+  }
+
+Instead, write this as:
+
+.. code-block:: c++
+
+  std::vector<foo> V;
+  for ( ... ) {
+     // make use of V.
+     V.clear();
+  }
+
+Doing so will save (at least) one heap allocation and free per iteration of the
+loop.
+
+.. _dss_deque:
+
+<deque>
+^^^^^^^
+
+``std::deque`` is, in some senses, a generalized version of ``std::vector``.
+Like ``std::vector``, it provides constant time random access and other similar
+properties, but it also provides efficient access to the front of the list.  It
+does not guarantee continuity of elements within memory.
+
+In exchange for this extra flexibility, ``std::deque`` has significantly higher
+constant factor costs than ``std::vector``.  If possible, use ``std::vector`` or
+something cheaper.
+
+.. _dss_list:
+
+<list>
+^^^^^^
+
+``std::list`` is an extremely inefficient class that is rarely useful.  It
+performs a heap allocation for every element inserted into it, thus having an
+extremely high constant factor, particularly for small data types.
+``std::list`` also only supports bidirectional iteration, not random access
+iteration.
+
+In exchange for this high cost, std::list supports efficient access to both ends
+of the list (like ``std::deque``, but unlike ``std::vector`` or
+``SmallVector``).  In addition, the iterator invalidation characteristics of
+std::list are stronger than that of a vector class: inserting or removing an
+element into the list does not invalidate iterator or pointers to other elements
+in the list.
+
+.. _dss_ilist:
+
+llvm/ADT/ilist.h
+^^^^^^^^^^^^^^^^
+
+``ilist<T>`` implements an 'intrusive' doubly-linked list.  It is intrusive,
+because it requires the element to store and provide access to the prev/next
+pointers for the list.
+
+``ilist`` has the same drawbacks as ``std::list``, and additionally requires an
+``ilist_traits`` implementation for the element type, but it provides some novel
+characteristics.  In particular, it can efficiently store polymorphic objects,
+the traits class is informed when an element is inserted or removed from the
+list, and ``ilist``\ s are guaranteed to support a constant-time splice
+operation.
+
+These properties are exactly what we want for things like ``Instruction``\ s and
+basic blocks, which is why these are implemented with ``ilist``\ s.
+
+Related classes of interest are explained in the following subsections:
+
+* :ref:`ilist_traits <dss_ilist_traits>`
+
+* :ref:`iplist <dss_iplist>`
+
+* :ref:`llvm/ADT/ilist_node.h <dss_ilist_node>`
+
+* :ref:`Sentinels <dss_ilist_sentinel>`
+
+.. _dss_packedvector:
+
+llvm/ADT/PackedVector.h
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Useful for storing a vector of values using only a few number of bits for each
+value.  Apart from the standard operations of a vector-like container, it can
+also perform an 'or' set operation.
+
+For example:
+
+.. code-block:: c++
+
+  enum State {
+      None = 0x0,
+      FirstCondition = 0x1,
+      SecondCondition = 0x2,
+      Both = 0x3
+  };
+
+  State get() {
+      PackedVector<State, 2> Vec1;
+      Vec1.push_back(FirstCondition);
+
+      PackedVector<State, 2> Vec2;
+      Vec2.push_back(SecondCondition);
+
+      Vec1 |= Vec2;
+      return Vec1[0]; // returns 'Both'.
+  }
+
+.. _dss_ilist_traits:
+
+ilist_traits
+^^^^^^^^^^^^
+
+``ilist_traits<T>`` is ``ilist<T>``'s customization mechanism. ``iplist<T>``
+(and consequently ``ilist<T>``) publicly derive from this traits class.
+
+.. _dss_iplist:
+
+iplist
+^^^^^^
+
+``iplist<T>`` is ``ilist<T>``'s base and as such supports a slightly narrower
+interface.  Notably, inserters from ``T&`` are absent.
+
+``ilist_traits<T>`` is a public base of this class and can be used for a wide
+variety of customizations.
+
+.. _dss_ilist_node:
+
+llvm/ADT/ilist_node.h
+^^^^^^^^^^^^^^^^^^^^^
+
+``ilist_node<T>`` implements a the forward and backward links that are expected
+by the ``ilist<T>`` (and analogous containers) in the default manner.
+
+``ilist_node<T>``\ s are meant to be embedded in the node type ``T``, usually
+``T`` publicly derives from ``ilist_node<T>``.
+
+.. _dss_ilist_sentinel:
+
+Sentinels
+^^^^^^^^^
+
+``ilist``\ s have another specialty that must be considered.  To be a good
+citizen in the C++ ecosystem, it needs to support the standard container
+operations, such as ``begin`` and ``end`` iterators, etc.  Also, the
+``operator--`` must work correctly on the ``end`` iterator in the case of
+non-empty ``ilist``\ s.
+
+The only sensible solution to this problem is to allocate a so-called *sentinel*
+along with the intrusive list, which serves as the ``end`` iterator, providing
+the back-link to the last element.  However conforming to the C++ convention it
+is illegal to ``operator++`` beyond the sentinel and it also must not be
+dereferenced.
+
+These constraints allow for some implementation freedom to the ``ilist`` how to
+allocate and store the sentinel.  The corresponding policy is dictated by
+``ilist_traits<T>``.  By default a ``T`` gets heap-allocated whenever the need
+for a sentinel arises.
+
+While the default policy is sufficient in most cases, it may break down when
+``T`` does not provide a default constructor.  Also, in the case of many
+instances of ``ilist``\ s, the memory overhead of the associated sentinels is
+wasted.  To alleviate the situation with numerous and voluminous
+``T``-sentinels, sometimes a trick is employed, leading to *ghostly sentinels*.
+
+Ghostly sentinels are obtained by specially-crafted ``ilist_traits<T>`` which
+superpose the sentinel with the ``ilist`` instance in memory.  Pointer
+arithmetic is used to obtain the sentinel, which is relative to the ``ilist``'s
+``this`` pointer.  The ``ilist`` is augmented by an extra pointer, which serves
+as the back-link of the sentinel.  This is the only field in the ghostly
+sentinel which can be legally accessed.
+
+.. _dss_other:
+
+Other Sequential Container options
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Other STL containers are available, such as ``std::string``.
+
+There are also various STL adapter classes such as ``std::queue``,
+``std::priority_queue``, ``std::stack``, etc.  These provide simplified access
+to an underlying container but don't affect the cost of the container itself.
+
+.. _ds_string:
+
+String-like containers
+----------------------
+
+There are a variety of ways to pass around and use strings in C and C++, and
+LLVM adds a few new options to choose from.  Pick the first option on this list
+that will do what you need, they are ordered according to their relative cost.
+
+Note that is is generally preferred to *not* pass strings around as ``const
+char*``'s.  These have a number of problems, including the fact that they
+cannot represent embedded nul ("\0") characters, and do not have a length
+available efficiently.  The general replacement for '``const char*``' is
+StringRef.
+
+For more information on choosing string containers for APIs, please see
+:ref:`Passing Strings <string_apis>`.
+
+.. _dss_stringref:
+
+llvm/ADT/StringRef.h
+^^^^^^^^^^^^^^^^^^^^
+
+The StringRef class is a simple value class that contains a pointer to a
+character and a length, and is quite related to the :ref:`ArrayRef
+<dss_arrayref>` class (but specialized for arrays of characters).  Because
+StringRef carries a length with it, it safely handles strings with embedded nul
+characters in it, getting the length does not require a strlen call, and it even
+has very convenient APIs for slicing and dicing the character range that it
+represents.
+
+StringRef is ideal for passing simple strings around that are known to be live,
+either because they are C string literals, std::string, a C array, or a
+SmallVector.  Each of these cases has an efficient implicit conversion to
+StringRef, which doesn't result in a dynamic strlen being executed.
+
+StringRef has a few major limitations which make more powerful string containers
+useful:
+
+#. You cannot directly convert a StringRef to a 'const char*' because there is
+   no way to add a trailing nul (unlike the .c_str() method on various stronger
+   classes).
+
+#. StringRef doesn't own or keep alive the underlying string bytes.
+   As such it can easily lead to dangling pointers, and is not suitable for
+   embedding in datastructures in most cases (instead, use an std::string or
+   something like that).
+
+#. For the same reason, StringRef cannot be used as the return value of a
+   method if the method "computes" the result string.  Instead, use std::string.
+
+#. StringRef's do not allow you to mutate the pointed-to string bytes and it
+   doesn't allow you to insert or remove bytes from the range.  For editing
+   operations like this, it interoperates with the :ref:`Twine <dss_twine>`
+   class.
+
+Because of its strengths and limitations, it is very common for a function to
+take a StringRef and for a method on an object to return a StringRef that points
+into some string that it owns.
+
+.. _dss_twine:
+
+llvm/ADT/Twine.h
+^^^^^^^^^^^^^^^^
+
+The Twine class is used as an intermediary datatype for APIs that want to take a
+string that can be constructed inline with a series of concatenations.  Twine
+works by forming recursive instances of the Twine datatype (a simple value
+object) on the stack as temporary objects, linking them together into a tree
+which is then linearized when the Twine is consumed.  Twine is only safe to use
+as the argument to a function, and should always be a const reference, e.g.:
+
+.. code-block:: c++
+
+  void foo(const Twine &T);
+  ...
+  StringRef X = ...
+  unsigned i = ...
+  foo(X + "." + Twine(i));
+
+This example forms a string like "blarg.42" by concatenating the values
+together, and does not form intermediate strings containing "blarg" or "blarg.".
+
+Because Twine is constructed with temporary objects on the stack, and because
+these instances are destroyed at the end of the current statement, it is an
+inherently dangerous API.  For example, this simple variant contains undefined
+behavior and will probably crash:
+
+.. code-block:: c++
+
+  void foo(const Twine &T);
+  ...
+  StringRef X = ...
+  unsigned i = ...
+  const Twine &Tmp = X + "." + Twine(i);
+  foo(Tmp);
+
+... because the temporaries are destroyed before the call.  That said, Twine's
+are much more efficient than intermediate std::string temporaries, and they work
+really well with StringRef.  Just be aware of their limitations.
+
+.. _dss_smallstring:
+
+llvm/ADT/SmallString.h
+^^^^^^^^^^^^^^^^^^^^^^
+
+SmallString is a subclass of :ref:`SmallVector <dss_smallvector>` that adds some
+convenience APIs like += that takes StringRef's.  SmallString avoids allocating
+memory in the case when the preallocated space is enough to hold its data, and
+it calls back to general heap allocation when required.  Since it owns its data,
+it is very safe to use and supports full mutation of the string.
+
+Like SmallVector's, the big downside to SmallString is their sizeof.  While they
+are optimized for small strings, they themselves are not particularly small.
+This means that they work great for temporary scratch buffers on the stack, but
+should not generally be put into the heap: it is very rare to see a SmallString
+as the member of a frequently-allocated heap data structure or returned
+by-value.
+
+.. _dss_stdstring:
+
+std::string
+^^^^^^^^^^^
+
+The standard C++ std::string class is a very general class that (like
+SmallString) owns its underlying data.  sizeof(std::string) is very reasonable
+so it can be embedded into heap data structures and returned by-value.  On the
+other hand, std::string is highly inefficient for inline editing (e.g.
+concatenating a bunch of stuff together) and because it is provided by the
+standard library, its performance characteristics depend a lot of the host
+standard library (e.g. libc++ and MSVC provide a highly optimized string class,
+GCC contains a really slow implementation).
+
+The major disadvantage of std::string is that almost every operation that makes
+them larger can allocate memory, which is slow.  As such, it is better to use
+SmallVector or Twine as a scratch buffer, but then use std::string to persist
+the result.
+
+.. _ds_set:
+
+Set-Like Containers (std::set, SmallSet, SetVector, etc)
+--------------------------------------------------------
+
+Set-like containers are useful when you need to canonicalize multiple values
+into a single representation.  There are several different choices for how to do
+this, providing various trade-offs.
+
+.. _dss_sortedvectorset:
+
+A sorted 'vector'
+^^^^^^^^^^^^^^^^^
+
+If you intend to insert a lot of elements, then do a lot of queries, a great
+approach is to use a vector (or other sequential container) with
+std::sort+std::unique to remove duplicates.  This approach works really well if
+your usage pattern has these two distinct phases (insert then query), and can be
+coupled with a good choice of :ref:`sequential container <ds_sequential>`.
+
+This combination provides the several nice properties: the result data is
+contiguous in memory (good for cache locality), has few allocations, is easy to
+address (iterators in the final vector are just indices or pointers), and can be
+efficiently queried with a standard binary or radix search.
+
+.. _dss_smallset:
+
+llvm/ADT/SmallSet.h
+^^^^^^^^^^^^^^^^^^^
+
+If you have a set-like data structure that is usually small and whose elements
+are reasonably small, a ``SmallSet<Type, N>`` is a good choice.  This set has
+space for N elements in place (thus, if the set is dynamically smaller than N,
+no malloc traffic is required) and accesses them with a simple linear search.
+When the set grows beyond 'N' elements, it allocates a more expensive
+representation that guarantees efficient access (for most types, it falls back
+to std::set, but for pointers it uses something far better, :ref:`SmallPtrSet
+<dss_smallptrset>`.
+
+The magic of this class is that it handles small sets extremely efficiently, but
+gracefully handles extremely large sets without loss of efficiency.  The
+drawback is that the interface is quite small: it supports insertion, queries
+and erasing, but does not support iteration.
+
+.. _dss_smallptrset:
+
+llvm/ADT/SmallPtrSet.h
+^^^^^^^^^^^^^^^^^^^^^^
+
+SmallPtrSet has all the advantages of ``SmallSet`` (and a ``SmallSet`` of
+pointers is transparently implemented with a ``SmallPtrSet``), but also supports
+iterators.  If more than 'N' insertions are performed, a single quadratically
+probed hash table is allocated and grows as needed, providing extremely
+efficient access (constant time insertion/deleting/queries with low constant
+factors) and is very stingy with malloc traffic.
+
+Note that, unlike ``std::set``, the iterators of ``SmallPtrSet`` are invalidated
+whenever an insertion occurs.  Also, the values visited by the iterators are not
+visited in sorted order.
+
+.. _dss_denseset:
+
+llvm/ADT/DenseSet.h
+^^^^^^^^^^^^^^^^^^^
+
+DenseSet is a simple quadratically probed hash table.  It excels at supporting
+small values: it uses a single allocation to hold all of the pairs that are
+currently inserted in the set.  DenseSet is a great way to unique small values
+that are not simple pointers (use :ref:`SmallPtrSet <dss_smallptrset>` for
+pointers).  Note that DenseSet has the same requirements for the value type that
+:ref:`DenseMap <dss_densemap>` has.
+
+.. _dss_sparseset:
+
+llvm/ADT/SparseSet.h
+^^^^^^^^^^^^^^^^^^^^
+
+SparseSet holds a small number of objects identified by unsigned keys of
+moderate size.  It uses a lot of memory, but provides operations that are almost
+as fast as a vector.  Typical keys are physical registers, virtual registers, or
+numbered basic blocks.
+
+SparseSet is useful for algorithms that need very fast clear/find/insert/erase
+and fast iteration over small sets.  It is not intended for building composite
+data structures.
+
+.. _dss_FoldingSet:
+
+llvm/ADT/FoldingSet.h
+^^^^^^^^^^^^^^^^^^^^^
+
+FoldingSet is an aggregate class that is really good at uniquing
+expensive-to-create or polymorphic objects.  It is a combination of a chained
+hash table with intrusive links (uniqued objects are required to inherit from
+FoldingSetNode) that uses :ref:`SmallVector <dss_smallvector>` as part of its ID
+process.
+
+Consider a case where you want to implement a "getOrCreateFoo" method for a
+complex object (for example, a node in the code generator).  The client has a
+description of **what** it wants to generate (it knows the opcode and all the
+operands), but we don't want to 'new' a node, then try inserting it into a set
+only to find out it already exists, at which point we would have to delete it
+and return the node that already exists.
+
+To support this style of client, FoldingSet perform a query with a
+FoldingSetNodeID (which wraps SmallVector) that can be used to describe the
+element that we want to query for.  The query either returns the element
+matching the ID or it returns an opaque ID that indicates where insertion should
+take place.  Construction of the ID usually does not require heap traffic.
+
+Because FoldingSet uses intrusive links, it can support polymorphic objects in
+the set (for example, you can have SDNode instances mixed with LoadSDNodes).
+Because the elements are individually allocated, pointers to the elements are
+stable: inserting or removing elements does not invalidate any pointers to other
+elements.
+
+.. _dss_set:
+
+<set>
+^^^^^
+
+``std::set`` is a reasonable all-around set class, which is decent at many
+things but great at nothing.  std::set allocates memory for each element
+inserted (thus it is very malloc intensive) and typically stores three pointers
+per element in the set (thus adding a large amount of per-element space
+overhead).  It offers guaranteed log(n) performance, which is not particularly
+fast from a complexity standpoint (particularly if the elements of the set are
+expensive to compare, like strings), and has extremely high constant factors for
+lookup, insertion and removal.
+
+The advantages of std::set are that its iterators are stable (deleting or
+inserting an element from the set does not affect iterators or pointers to other
+elements) and that iteration over the set is guaranteed to be in sorted order.
+If the elements in the set are large, then the relative overhead of the pointers
+and malloc traffic is not a big deal, but if the elements of the set are small,
+std::set is almost never a good choice.
+
+.. _dss_setvector:
+
+llvm/ADT/SetVector.h
+^^^^^^^^^^^^^^^^^^^^
+
+LLVM's ``SetVector<Type>`` is an adapter class that combines your choice of a
+set-like container along with a :ref:`Sequential Container <ds_sequential>` The
+important property that this provides is efficient insertion with uniquing
+(duplicate elements are ignored) with iteration support.  It implements this by
+inserting elements into both a set-like container and the sequential container,
+using the set-like container for uniquing and the sequential container for
+iteration.
+
+The difference between SetVector and other sets is that the order of iteration
+is guaranteed to match the order of insertion into the SetVector.  This property
+is really important for things like sets of pointers.  Because pointer values
+are non-deterministic (e.g. vary across runs of the program on different
+machines), iterating over the pointers in the set will not be in a well-defined
+order.
+
+The drawback of SetVector is that it requires twice as much space as a normal
+set and has the sum of constant factors from the set-like container and the
+sequential container that it uses.  Use it **only** if you need to iterate over
+the elements in a deterministic order.  SetVector is also expensive to delete
+elements out of (linear time), unless you use it's "pop_back" method, which is
+faster.
+
+``SetVector`` is an adapter class that defaults to using ``std::vector`` and a
+size 16 ``SmallSet`` for the underlying containers, so it is quite expensive.
+However, ``"llvm/ADT/SetVector.h"`` also provides a ``SmallSetVector`` class,
+which defaults to using a ``SmallVector`` and ``SmallSet`` of a specified size.
+If you use this, and if your sets are dynamically smaller than ``N``, you will
+save a lot of heap traffic.
+
+.. _dss_uniquevector:
+
+llvm/ADT/UniqueVector.h
+^^^^^^^^^^^^^^^^^^^^^^^
+
+UniqueVector is similar to :ref:`SetVector <dss_setvector>` but it retains a
+unique ID for each element inserted into the set.  It internally contains a map
+and a vector, and it assigns a unique ID for each value inserted into the set.
+
+UniqueVector is very expensive: its cost is the sum of the cost of maintaining
+both the map and vector, it has high complexity, high constant factors, and
+produces a lot of malloc traffic.  It should be avoided.
+
+.. _dss_immutableset:
+
+llvm/ADT/ImmutableSet.h
+^^^^^^^^^^^^^^^^^^^^^^^
+
+ImmutableSet is an immutable (functional) set implementation based on an AVL
+tree.  Adding or removing elements is done through a Factory object and results
+in the creation of a new ImmutableSet object.  If an ImmutableSet already exists
+with the given contents, then the existing one is returned; equality is compared
+with a FoldingSetNodeID.  The time and space complexity of add or remove
+operations is logarithmic in the size of the original set.
+
+There is no method for returning an element of the set, you can only check for
+membership.
+
+.. _dss_otherset:
+
+Other Set-Like Container Options
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The STL provides several other options, such as std::multiset and the various
+"hash_set" like containers (whether from C++ TR1 or from the SGI library).  We
+never use hash_set and unordered_set because they are generally very expensive
+(each insertion requires a malloc) and very non-portable.
+
+std::multiset is useful if you're not interested in elimination of duplicates,
+but has all the drawbacks of std::set.  A sorted vector (where you don't delete
+duplicate entries) or some other approach is almost always better.
+
+.. _ds_map:
+
+Map-Like Containers (std::map, DenseMap, etc)
+---------------------------------------------
+
+Map-like containers are useful when you want to associate data to a key.  As
+usual, there are a lot of different ways to do this. :)
+
+.. _dss_sortedvectormap:
+
+A sorted 'vector'
+^^^^^^^^^^^^^^^^^
+
+If your usage pattern follows a strict insert-then-query approach, you can
+trivially use the same approach as :ref:`sorted vectors for set-like containers
+<dss_sortedvectorset>`.  The only difference is that your query function (which
+uses std::lower_bound to get efficient log(n) lookup) should only compare the
+key, not both the key and value.  This yields the same advantages as sorted
+vectors for sets.
+
+.. _dss_stringmap:
+
+llvm/ADT/StringMap.h
+^^^^^^^^^^^^^^^^^^^^
+
+Strings are commonly used as keys in maps, and they are difficult to support
+efficiently: they are variable length, inefficient to hash and compare when
+long, expensive to copy, etc.  StringMap is a specialized container designed to
+cope with these issues.  It supports mapping an arbitrary range of bytes to an
+arbitrary other object.
+
+The StringMap implementation uses a quadratically-probed hash table, where the
+buckets store a pointer to the heap allocated entries (and some other stuff).
+The entries in the map must be heap allocated because the strings are variable
+length.  The string data (key) and the element object (value) are stored in the
+same allocation with the string data immediately after the element object.
+This container guarantees the "``(char*)(&Value+1)``" points to the key string
+for a value.
+
+The StringMap is very fast for several reasons: quadratic probing is very cache
+efficient for lookups, the hash value of strings in buckets is not recomputed
+when looking up an element, StringMap rarely has to touch the memory for
+unrelated objects when looking up a value (even when hash collisions happen),
+hash table growth does not recompute the hash values for strings already in the
+table, and each pair in the map is store in a single allocation (the string data
+is stored in the same allocation as the Value of a pair).
+
+StringMap also provides query methods that take byte ranges, so it only ever
+copies a string if a value is inserted into the table.
+
+StringMap iteratation order, however, is not guaranteed to be deterministic, so
+any uses which require that should instead use a std::map.
+
+.. _dss_indexmap:
+
+llvm/ADT/IndexedMap.h
+^^^^^^^^^^^^^^^^^^^^^
+
+IndexedMap is a specialized container for mapping small dense integers (or
+values that can be mapped to small dense integers) to some other type.  It is
+internally implemented as a vector with a mapping function that maps the keys
+to the dense integer range.
+
+This is useful for cases like virtual registers in the LLVM code generator: they
+have a dense mapping that is offset by a compile-time constant (the first
+virtual register ID).
+
+.. _dss_densemap:
+
+llvm/ADT/DenseMap.h
+^^^^^^^^^^^^^^^^^^^
+
+DenseMap is a simple quadratically probed hash table.  It excels at supporting
+small keys and values: it uses a single allocation to hold all of the pairs
+that are currently inserted in the map.  DenseMap is a great way to map
+pointers to pointers, or map other small types to each other.
+
+There are several aspects of DenseMap that you should be aware of, however.
+The iterators in a DenseMap are invalidated whenever an insertion occurs,
+unlike map.  Also, because DenseMap allocates space for a large number of
+key/value pairs (it starts with 64 by default), it will waste a lot of space if
+your keys or values are large.  Finally, you must implement a partial
+specialization of DenseMapInfo for the key that you want, if it isn't already
+supported.  This is required to tell DenseMap about two special marker values
+(which can never be inserted into the map) that it needs internally.
+
+DenseMap's find_as() method supports lookup operations using an alternate key
+type.  This is useful in cases where the normal key type is expensive to
+construct, but cheap to compare against.  The DenseMapInfo is responsible for
+defining the appropriate comparison and hashing methods for each alternate key
+type used.
+
+.. _dss_valuemap:
+
+llvm/ADT/ValueMap.h
+^^^^^^^^^^^^^^^^^^^
+
+ValueMap is a wrapper around a :ref:`DenseMap <dss_densemap>` mapping
+``Value*``\ s (or subclasses) to another type.  When a Value is deleted or
+RAUW'ed, ValueMap will update itself so the new version of the key is mapped to
+the same value, just as if the key were a WeakVH.  You can configure exactly how
+this happens, and what else happens on these two events, by passing a ``Config``
+parameter to the ValueMap template.
+
+.. _dss_intervalmap:
+
+llvm/ADT/IntervalMap.h
+^^^^^^^^^^^^^^^^^^^^^^
+
+IntervalMap is a compact map for small keys and values.  It maps key intervals
+instead of single keys, and it will automatically coalesce adjacent intervals.
+When then map only contains a few intervals, they are stored in the map object
+itself to avoid allocations.
+
+The IntervalMap iterators are quite big, so they should not be passed around as
+STL iterators.  The heavyweight iterators allow a smaller data structure.
+
+.. _dss_map:
+
+<map>
+^^^^^
+
+std::map has similar characteristics to :ref:`std::set <dss_set>`: it uses a
+single allocation per pair inserted into the map, it offers log(n) lookup with
+an extremely large constant factor, imposes a space penalty of 3 pointers per
+pair in the map, etc.
+
+std::map is most useful when your keys or values are very large, if you need to
+iterate over the collection in sorted order, or if you need stable iterators
+into the map (i.e. they don't get invalidated if an insertion or deletion of
+another element takes place).
+
+.. _dss_mapvector:
+
+llvm/ADT/MapVector.h
+^^^^^^^^^^^^^^^^^^^^
+
+``MapVector<KeyT,ValueT>`` provides a subset of the DenseMap interface.  The
+main difference is that the iteration order is guaranteed to be the insertion
+order, making it an easy (but somewhat expensive) solution for non-deterministic
+iteration over maps of pointers.
+
+It is implemented by mapping from key to an index in a vector of key,value
+pairs.  This provides fast lookup and iteration, but has two main drawbacks: The
+key is stored twice and it doesn't support removing elements.
+
+.. _dss_inteqclasses:
+
+llvm/ADT/IntEqClasses.h
+^^^^^^^^^^^^^^^^^^^^^^^
+
+IntEqClasses provides a compact representation of equivalence classes of small
+integers.  Initially, each integer in the range 0..n-1 has its own equivalence
+class.  Classes can be joined by passing two class representatives to the
+join(a, b) method.  Two integers are in the same class when findLeader() returns
+the same representative.
+
+Once all equivalence classes are formed, the map can be compressed so each
+integer 0..n-1 maps to an equivalence class number in the range 0..m-1, where m
+is the total number of equivalence classes.  The map must be uncompressed before
+it can be edited again.
+
+.. _dss_immutablemap:
+
+llvm/ADT/ImmutableMap.h
+^^^^^^^^^^^^^^^^^^^^^^^
+
+ImmutableMap is an immutable (functional) map implementation based on an AVL
+tree.  Adding or removing elements is done through a Factory object and results
+in the creation of a new ImmutableMap object.  If an ImmutableMap already exists
+with the given key set, then the existing one is returned; equality is compared
+with a FoldingSetNodeID.  The time and space complexity of add or remove
+operations is logarithmic in the size of the original map.
+
+.. _dss_othermap:
+
+Other Map-Like Container Options
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The STL provides several other options, such as std::multimap and the various
+"hash_map" like containers (whether from C++ TR1 or from the SGI library).  We
+never use hash_set and unordered_set because they are generally very expensive
+(each insertion requires a malloc) and very non-portable.
+
+std::multimap is useful if you want to map a key to multiple values, but has all
+the drawbacks of std::map.  A sorted vector or some other approach is almost
+always better.
+
+.. _ds_bit:
+
+Bit storage containers (BitVector, SparseBitVector)
+---------------------------------------------------
+
+Unlike the other containers, there are only two bit storage containers, and
+choosing when to use each is relatively straightforward.
+
+One additional option is ``std::vector<bool>``: we discourage its use for two
+reasons 1) the implementation in many common compilers (e.g.  commonly
+available versions of GCC) is extremely inefficient and 2) the C++ standards
+committee is likely to deprecate this container and/or change it significantly
+somehow.  In any case, please don't use it.
+
+.. _dss_bitvector:
+
+BitVector
+^^^^^^^^^
+
+The BitVector container provides a dynamic size set of bits for manipulation.
+It supports individual bit setting/testing, as well as set operations.  The set
+operations take time O(size of bitvector), but operations are performed one word
+at a time, instead of one bit at a time.  This makes the BitVector very fast for
+set operations compared to other containers.  Use the BitVector when you expect
+the number of set bits to be high (i.e. a dense set).
+
+.. _dss_smallbitvector:
+
+SmallBitVector
+^^^^^^^^^^^^^^
+
+The SmallBitVector container provides the same interface as BitVector, but it is
+optimized for the case where only a small number of bits, less than 25 or so,
+are needed.  It also transparently supports larger bit counts, but slightly less
+efficiently than a plain BitVector, so SmallBitVector should only be used when
+larger counts are rare.
+
+At this time, SmallBitVector does not support set operations (and, or, xor), and
+its operator[] does not provide an assignable lvalue.
+
+.. _dss_sparsebitvector:
+
+SparseBitVector
+^^^^^^^^^^^^^^^
+
+The SparseBitVector container is much like BitVector, with one major difference:
+Only the bits that are set, are stored.  This makes the SparseBitVector much
+more space efficient than BitVector when the set is sparse, as well as making
+set operations O(number of set bits) instead of O(size of universe).  The
+downside to the SparseBitVector is that setting and testing of random bits is
+O(N), and on large SparseBitVectors, this can be slower than BitVector.  In our
+implementation, setting or testing bits in sorted order (either forwards or
+reverse) is O(1) worst case.  Testing and setting bits within 128 bits (depends
+on size) of the current bit is also O(1).  As a general statement,
+testing/setting bits in a SparseBitVector is O(distance away from last set bit).
+
+.. _common:
+
+Helpful Hints for Common Operations
+===================================
+
+This section describes how to perform some very simple transformations of LLVM
+code.  This is meant to give examples of common idioms used, showing the
+practical side of LLVM transformations.
+
+Because this is a "how-to" section, you should also read about the main classes
+that you will be working with.  The :ref:`Core LLVM Class Hierarchy Reference
+<coreclasses>` contains details and descriptions of the main classes that you
+should know about.
+
+.. _inspection:
+
+Basic Inspection and Traversal Routines
+---------------------------------------
+
+The LLVM compiler infrastructure have many different data structures that may be
+traversed.  Following the example of the C++ standard template library, the
+techniques used to traverse these various data structures are all basically the
+same.  For a enumerable sequence of values, the ``XXXbegin()`` function (or
+method) returns an iterator to the start of the sequence, the ``XXXend()``
+function returns an iterator pointing to one past the last valid element of the
+sequence, and there is some ``XXXiterator`` data type that is common between the
+two operations.
+
+Because the pattern for iteration is common across many different aspects of the
+program representation, the standard template library algorithms may be used on
+them, and it is easier to remember how to iterate.  First we show a few common
+examples of the data structures that need to be traversed.  Other data
+structures are traversed in very similar ways.
+
+.. _iterate_function:
+
+Iterating over the ``BasicBlock`` in a ``Function``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+It's quite common to have a ``Function`` instance that you'd like to transform
+in some way; in particular, you'd like to manipulate its ``BasicBlock``\ s.  To
+facilitate this, you'll need to iterate over all of the ``BasicBlock``\ s that
+constitute the ``Function``.  The following is an example that prints the name
+of a ``BasicBlock`` and the number of ``Instruction``\ s it contains:
+
+.. code-block:: c++
+
+  // func is a pointer to a Function instance
+  for (Function::iterator i = func->begin(), e = func->end(); i != e; ++i)
+    // Print out the name of the basic block if it has one, and then the
+    // number of instructions that it contains
+    errs() << "Basic block (name=" << i->getName() << ") has "
+               << i->size() << " instructions.\n";
+
+Note that i can be used as if it were a pointer for the purposes of invoking
+member functions of the ``Instruction`` class.  This is because the indirection
+operator is overloaded for the iterator classes.  In the above code, the
+expression ``i->size()`` is exactly equivalent to ``(*i).size()`` just like
+you'd expect.
+
+.. _iterate_basicblock:
+
+Iterating over the ``Instruction`` in a ``BasicBlock``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Just like when dealing with ``BasicBlock``\ s in ``Function``\ s, it's easy to
+iterate over the individual instructions that make up ``BasicBlock``\ s.  Here's
+a code snippet that prints out each instruction in a ``BasicBlock``:
+
+.. code-block:: c++
+
+  // blk is a pointer to a BasicBlock instance
+  for (BasicBlock::iterator i = blk->begin(), e = blk->end(); i != e; ++i)
+     // The next statement works since operator<<(ostream&,...)
+     // is overloaded for Instruction&
+     errs() << *i << "\n";
+
+
+However, this isn't really the best way to print out the contents of a
+``BasicBlock``!  Since the ostream operators are overloaded for virtually
+anything you'll care about, you could have just invoked the print routine on the
+basic block itself: ``errs() << *blk << "\n";``.
+
+.. _iterate_insiter:
+
+Iterating over the ``Instruction`` in a ``Function``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If you're finding that you commonly iterate over a ``Function``'s
+``BasicBlock``\ s and then that ``BasicBlock``'s ``Instruction``\ s,
+``InstIterator`` should be used instead.  You'll need to include
+``llvm/Support/InstIterator.h`` (`doxygen
+<http://llvm.org/doxygen/InstIterator_8h-source.html>`__) and then instantiate
+``InstIterator``\ s explicitly in your code.  Here's a small example that shows
+how to dump all instructions in a function to the standard error stream:
+
+.. code-block:: c++
+
+  #include "llvm/Support/InstIterator.h"
+
+  // F is a pointer to a Function instance
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+    errs() << *I << "\n";
+
+Easy, isn't it?  You can also use ``InstIterator``\ s to fill a work list with
+its initial contents.  For example, if you wanted to initialize a work list to
+contain all instructions in a ``Function`` F, all you would need to do is
+something like:
+
+.. code-block:: c++
+
+  std::set<Instruction*> worklist;
+  // or better yet, SmallPtrSet<Instruction*, 64> worklist;
+
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+    worklist.insert(&*I);
+
+The STL set ``worklist`` would now contain all instructions in the ``Function``
+pointed to by F.
+
+.. _iterate_convert:
+
+Turning an iterator into a class pointer (and vice-versa)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sometimes, it'll be useful to grab a reference (or pointer) to a class instance
+when all you've got at hand is an iterator.  Well, extracting a reference or a
+pointer from an iterator is very straight-forward.  Assuming that ``i`` is a
+``BasicBlock::iterator`` and ``j`` is a ``BasicBlock::const_iterator``:
+
+.. code-block:: c++
+
+  Instruction& inst = *i;   // Grab reference to instruction reference
+  Instruction* pinst = &*i; // Grab pointer to instruction reference
+  const Instruction& inst = *j;
+
+However, the iterators you'll be working with in the LLVM framework are special:
+they will automatically convert to a ptr-to-instance type whenever they need to.
+Instead of derferencing the iterator and then taking the address of the result,
+you can simply assign the iterator to the proper pointer type and you get the
+dereference and address-of operation as a result of the assignment (behind the
+scenes, this is a result of overloading casting mechanisms).  Thus the last line
+of the last example,
+
+.. code-block:: c++
+
+  Instruction *pinst = &*i;
+
+is semantically equivalent to
+
+.. code-block:: c++
+
+  Instruction *pinst = i;
+
+It's also possible to turn a class pointer into the corresponding iterator, and
+this is a constant time operation (very efficient).  The following code snippet
+illustrates use of the conversion constructors provided by LLVM iterators.  By
+using these, you can explicitly grab the iterator of something without actually
+obtaining it via iteration over some structure:
+
+.. code-block:: c++
+
+  void printNextInstruction(Instruction* inst) {
+    BasicBlock::iterator it(inst);
+    ++it; // After this line, it refers to the instruction after *inst
+    if (it != inst->getParent()->end()) errs() << *it << "\n";
+  }
+
+Unfortunately, these implicit conversions come at a cost; they prevent these
+iterators from conforming to standard iterator conventions, and thus from being
+usable with standard algorithms and containers.  For example, they prevent the
+following code, where ``B`` is a ``BasicBlock``, from compiling:
+
+.. code-block:: c++
+
+  llvm::SmallVector<llvm::Instruction *, 16>(B->begin(), B->end());
+
+Because of this, these implicit conversions may be removed some day, and
+``operator*`` changed to return a pointer instead of a reference.
+
+.. _iterate_complex:
+
+Finding call sites: a slightly more complex example
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Say that you're writing a FunctionPass and would like to count all the locations
+in the entire module (that is, across every ``Function``) where a certain
+function (i.e., some ``Function *``) is already in scope.  As you'll learn
+later, you may want to use an ``InstVisitor`` to accomplish this in a much more
+straight-forward manner, but this example will allow us to explore how you'd do
+it if you didn't have ``InstVisitor`` around.  In pseudo-code, this is what we
+want to do:
+
+.. code-block:: none
+
+  initialize callCounter to zero
+  for each Function f in the Module
+    for each BasicBlock b in f
+      for each Instruction i in b
+        if (i is a CallInst and calls the given function)
+          increment callCounter
+
+And the actual code is (remember, because we're writing a ``FunctionPass``, our
+``FunctionPass``-derived class simply has to override the ``runOnFunction``
+method):
+
+.. code-block:: c++
+
+  Function* targetFunc = ...;
+
+  class OurFunctionPass : public FunctionPass {
+    public:
+      OurFunctionPass(): callCounter(0) { }
+
+      virtual runOnFunction(Function& F) {
+        for (Function::iterator b = F.begin(), be = F.end(); b != be; ++b) {
+          for (BasicBlock::iterator i = b->begin(), ie = b->end(); i != ie; ++i) {
+            if (CallInst* callInst = dyn_cast<CallInst>(&*i)) {
+              // We know we've encountered a call instruction, so we
+              // need to determine if it's a call to the
+              // function pointed to by m_func or not.
+              if (callInst->getCalledFunction() == targetFunc)
+                ++callCounter;
+            }
+          }
+        }
+      }
+
+    private:
+      unsigned callCounter;
+  };
+
+.. _calls_and_invokes:
+
+Treating calls and invokes the same way
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You may have noticed that the previous example was a bit oversimplified in that
+it did not deal with call sites generated by 'invoke' instructions.  In this,
+and in other situations, you may find that you want to treat ``CallInst``\ s and
+``InvokeInst``\ s the same way, even though their most-specific common base
+class is ``Instruction``, which includes lots of less closely-related things.
+For these cases, LLVM provides a handy wrapper class called ``CallSite``
+(`doxygen <http://llvm.org/doxygen/classllvm_1_1CallSite.html>`__) It is
+essentially a wrapper around an ``Instruction`` pointer, with some methods that
+provide functionality common to ``CallInst``\ s and ``InvokeInst``\ s.
+
+This class has "value semantics": it should be passed by value, not by reference
+and it should not be dynamically allocated or deallocated using ``operator new``
+or ``operator delete``.  It is efficiently copyable, assignable and
+constructable, with costs equivalents to that of a bare pointer.  If you look at
+its definition, it has only a single pointer member.
+
+.. _iterate_chains:
+
+Iterating over def-use & use-def chains
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Frequently, we might have an instance of the ``Value`` class (`doxygen
+<http://llvm.org/doxygen/classllvm_1_1Value.html>`__) and we want to determine
+which ``User`` s use the ``Value``.  The list of all ``User``\ s of a particular
+``Value`` is called a *def-use* chain.  For example, let's say we have a
+``Function*`` named ``F`` to a particular function ``foo``.  Finding all of the
+instructions that *use* ``foo`` is as simple as iterating over the *def-use*
+chain of ``F``:
+
+.. code-block:: c++
+
+  Function *F = ...;
+
+  for (Value::use_iterator i = F->use_begin(), e = F->use_end(); i != e; ++i)
+    if (Instruction *Inst = dyn_cast<Instruction>(*i)) {
+      errs() << "F is used in instruction:\n";
+      errs() << *Inst << "\n";
+    }
+
+Note that dereferencing a ``Value::use_iterator`` is not a very cheap operation.
+Instead of performing ``*i`` above several times, consider doing it only once in
+the loop body and reusing its result.
+
+Alternatively, it's common to have an instance of the ``User`` Class (`doxygen
+<http://llvm.org/doxygen/classllvm_1_1User.html>`__) and need to know what
+``Value``\ s are used by it.  The list of all ``Value``\ s used by a ``User`` is
+known as a *use-def* chain.  Instances of class ``Instruction`` are common
+``User`` s, so we might want to iterate over all of the values that a particular
+instruction uses (that is, the operands of the particular ``Instruction``):
+
+.. code-block:: c++
+
+  Instruction *pi = ...;
+
+  for (User::op_iterator i = pi->op_begin(), e = pi->op_end(); i != e; ++i) {
+    Value *v = *i;
+    // ...
+  }
+
+Declaring objects as ``const`` is an important tool of enforcing mutation free
+algorithms (such as analyses, etc.).  For this purpose above iterators come in
+constant flavors as ``Value::const_use_iterator`` and
+``Value::const_op_iterator``.  They automatically arise when calling
+``use/op_begin()`` on ``const Value*``\ s or ``const User*``\ s respectively.
+Upon dereferencing, they return ``const Use*``\ s.  Otherwise the above patterns
+remain unchanged.
+
+.. _iterate_preds:
+
+Iterating over predecessors & successors of blocks
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Iterating over the predecessors and successors of a block is quite easy with the
+routines defined in ``"llvm/Support/CFG.h"``.  Just use code like this to
+iterate over all predecessors of BB:
+
+.. code-block:: c++
+
+  #include "llvm/Support/CFG.h"
+  BasicBlock *BB = ...;
+
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    BasicBlock *Pred = *PI;
+    // ...
+  }
+
+Similarly, to iterate over successors use ``succ_iterator/succ_begin/succ_end``.
+
+.. _simplechanges:
+
+Making simple changes
+---------------------
+
+There are some primitive transformation operations present in the LLVM
+infrastructure that are worth knowing about.  When performing transformations,
+it's fairly common to manipulate the contents of basic blocks.  This section
+describes some of the common methods for doing so and gives example code.
+
+.. _schanges_creating:
+
+Creating and inserting new ``Instruction``\ s
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+*Instantiating Instructions*
+
+Creation of ``Instruction``\ s is straight-forward: simply call the constructor
+for the kind of instruction to instantiate and provide the necessary parameters.
+For example, an ``AllocaInst`` only *requires* a (const-ptr-to) ``Type``.  Thus:
+
+.. code-block:: c++
+
+  AllocaInst* ai = new AllocaInst(Type::Int32Ty);
+
+will create an ``AllocaInst`` instance that represents the allocation of one
+integer in the current stack frame, at run time.  Each ``Instruction`` subclass
+is likely to have varying default parameters which change the semantics of the
+instruction, so refer to the `doxygen documentation for the subclass of
+Instruction <http://llvm.org/doxygen/classllvm_1_1Instruction.html>`_ that
+you're interested in instantiating.
+
+*Naming values*
+
+It is very useful to name the values of instructions when you're able to, as
+this facilitates the debugging of your transformations.  If you end up looking
+at generated LLVM machine code, you definitely want to have logical names
+associated with the results of instructions!  By supplying a value for the
+``Name`` (default) parameter of the ``Instruction`` constructor, you associate a
+logical name with the result of the instruction's execution at run time.  For
+example, say that I'm writing a transformation that dynamically allocates space
+for an integer on the stack, and that integer is going to be used as some kind
+of index by some other code.  To accomplish this, I place an ``AllocaInst`` at
+the first point in the first ``BasicBlock`` of some ``Function``, and I'm
+intending to use it within the same ``Function``.  I might do:
+
+.. code-block:: c++
+
+  AllocaInst* pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc");
+
+where ``indexLoc`` is now the logical name of the instruction's execution value,
+which is a pointer to an integer on the run time stack.
+
+*Inserting instructions*
+
+There are essentially two ways to insert an ``Instruction`` into an existing
+sequence of instructions that form a ``BasicBlock``:
+
+* Insertion into an explicit instruction list
+
+  Given a ``BasicBlock* pb``, an ``Instruction* pi`` within that ``BasicBlock``,
+  and a newly-created instruction we wish to insert before ``*pi``, we do the
+  following:
+
+  .. code-block:: c++
+
+      BasicBlock *pb = ...;
+      Instruction *pi = ...;
+      Instruction *newInst = new Instruction(...);
+
+      pb->getInstList().insert(pi, newInst); // Inserts newInst before pi in pb
+
+  Appending to the end of a ``BasicBlock`` is so common that the ``Instruction``
+  class and ``Instruction``-derived classes provide constructors which take a
+  pointer to a ``BasicBlock`` to be appended to.  For example code that looked
+  like:
+
+  .. code-block:: c++
+
+    BasicBlock *pb = ...;
+    Instruction *newInst = new Instruction(...);
+
+    pb->getInstList().push_back(newInst); // Appends newInst to pb
+
+  becomes:
+
+  .. code-block:: c++
+
+    BasicBlock *pb = ...;
+    Instruction *newInst = new Instruction(..., pb);
+
+  which is much cleaner, especially if you are creating long instruction
+  streams.
+
+* Insertion into an implicit instruction list
+
+  ``Instruction`` instances that are already in ``BasicBlock``\ s are implicitly
+  associated with an existing instruction list: the instruction list of the
+  enclosing basic block.  Thus, we could have accomplished the same thing as the
+  above code without being given a ``BasicBlock`` by doing:
+
+  .. code-block:: c++
+
+    Instruction *pi = ...;
+    Instruction *newInst = new Instruction(...);
+
+    pi->getParent()->getInstList().insert(pi, newInst);
+
+  In fact, this sequence of steps occurs so frequently that the ``Instruction``
+  class and ``Instruction``-derived classes provide constructors which take (as
+  a default parameter) a pointer to an ``Instruction`` which the newly-created
+  ``Instruction`` should precede.  That is, ``Instruction`` constructors are
+  capable of inserting the newly-created instance into the ``BasicBlock`` of a
+  provided instruction, immediately before that instruction.  Using an
+  ``Instruction`` constructor with a ``insertBefore`` (default) parameter, the
+  above code becomes:
+
+  .. code-block:: c++
+
+    Instruction* pi = ...;
+    Instruction* newInst = new Instruction(..., pi);
+
+  which is much cleaner, especially if you're creating a lot of instructions and
+  adding them to ``BasicBlock``\ s.
+
+.. _schanges_deleting:
+
+Deleting Instructions
+^^^^^^^^^^^^^^^^^^^^^
+
+Deleting an instruction from an existing sequence of instructions that form a
+BasicBlock_ is very straight-forward: just call the instruction's
+``eraseFromParent()`` method.  For example:
+
+.. code-block:: c++
+
+  Instruction *I = .. ;
+  I->eraseFromParent();
+
+This unlinks the instruction from its containing basic block and deletes it.  If
+you'd just like to unlink the instruction from its containing basic block but
+not delete it, you can use the ``removeFromParent()`` method.
+
+.. _schanges_replacing:
+
+Replacing an Instruction with another Value
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Replacing individual instructions
+"""""""""""""""""""""""""""""""""
+
+Including "`llvm/Transforms/Utils/BasicBlockUtils.h
+<http://llvm.org/doxygen/BasicBlockUtils_8h-source.html>`_" permits use of two
+very useful replace functions: ``ReplaceInstWithValue`` and
+``ReplaceInstWithInst``.
+
+.. _schanges_deleting_sub:
+
+Deleting Instructions
+"""""""""""""""""""""
+
+* ``ReplaceInstWithValue``
+
+  This function replaces all uses of a given instruction with a value, and then
+  removes the original instruction.  The following example illustrates the
+  replacement of the result of a particular ``AllocaInst`` that allocates memory
+  for a single integer with a null pointer to an integer.
+
+  .. code-block:: c++
+
+    AllocaInst* instToReplace = ...;
+    BasicBlock::iterator ii(instToReplace);
+
+    ReplaceInstWithValue(instToReplace->getParent()->getInstList(), ii,
+                         Constant::getNullValue(PointerType::getUnqual(Type::Int32Ty)));
+
+* ``ReplaceInstWithInst``
+
+  This function replaces a particular instruction with another instruction,
+  inserting the new instruction into the basic block at the location where the
+  old instruction was, and replacing any uses of the old instruction with the
+  new instruction.  The following example illustrates the replacement of one
+  ``AllocaInst`` with another.
+
+  .. code-block:: c++
+
+    AllocaInst* instToReplace = ...;
+    BasicBlock::iterator ii(instToReplace);
+
+    ReplaceInstWithInst(instToReplace->getParent()->getInstList(), ii,
+                        new AllocaInst(Type::Int32Ty, 0, "ptrToReplacedInt"));
+
+
+Replacing multiple uses of Users and Values
+"""""""""""""""""""""""""""""""""""""""""""
+
+You can use ``Value::replaceAllUsesWith`` and ``User::replaceUsesOfWith`` to
+change more than one use at a time.  See the doxygen documentation for the
+`Value Class <http://llvm.org/doxygen/classllvm_1_1Value.html>`_ and `User Class
+<http://llvm.org/doxygen/classllvm_1_1User.html>`_, respectively, for more
+information.
+
+.. _schanges_deletingGV:
+
+Deleting GlobalVariables
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Deleting a global variable from a module is just as easy as deleting an
+Instruction.  First, you must have a pointer to the global variable that you
+wish to delete.  You use this pointer to erase it from its parent, the module.
+For example:
+
+.. code-block:: c++
+
+  GlobalVariable *GV = .. ;
+
+  GV->eraseFromParent();
+
+
+.. _create_types:
+
+How to Create Types
+-------------------
+
+In generating IR, you may need some complex types.  If you know these types
+statically, you can use ``TypeBuilder<...>::get()``, defined in
+``llvm/Support/TypeBuilder.h``, to retrieve them.  ``TypeBuilder`` has two forms
+depending on whether you're building types for cross-compilation or native
+library use.  ``TypeBuilder<T, true>`` requires that ``T`` be independent of the
+host environment, meaning that it's built out of types from the ``llvm::types``
+(`doxygen <http://llvm.org/doxygen/namespacellvm_1_1types.html>`__) namespace
+and pointers, functions, arrays, etc. built of those.  ``TypeBuilder<T, false>``
+additionally allows native C types whose size may depend on the host compiler.
+For example,
+
+.. code-block:: c++
+
+  FunctionType *ft = TypeBuilder<types::i<8>(types::i<32>*), true>::get();
+
+is easier to read and write than the equivalent
+
+.. code-block:: c++
+
+  std::vector<const Type*> params;
+  params.push_back(PointerType::getUnqual(Type::Int32Ty));
+  FunctionType *ft = FunctionType::get(Type::Int8Ty, params, false);
+
+See the `class comment
+<http://llvm.org/doxygen/TypeBuilder_8h-source.html#l00001>`_ for more details.
+
+.. _threading:
+
+Threads and LLVM
+================
+
+This section describes the interaction of the LLVM APIs with multithreading,
+both on the part of client applications, and in the JIT, in the hosted
+application.
+
+Note that LLVM's support for multithreading is still relatively young.  Up
+through version 2.5, the execution of threaded hosted applications was
+supported, but not threaded client access to the APIs.  While this use case is
+now supported, clients *must* adhere to the guidelines specified below to ensure
+proper operation in multithreaded mode.
+
+Note that, on Unix-like platforms, LLVM requires the presence of GCC's atomic
+intrinsics in order to support threaded operation.  If you need a
+multhreading-capable LLVM on a platform without a suitably modern system
+compiler, consider compiling LLVM and LLVM-GCC in single-threaded mode, and
+using the resultant compiler to build a copy of LLVM with multithreading
+support.
+
+.. _startmultithreaded:
+
+Entering and Exiting Multithreaded Mode
+---------------------------------------
+
+In order to properly protect its internal data structures while avoiding
+excessive locking overhead in the single-threaded case, the LLVM must intialize
+certain data structures necessary to provide guards around its internals.  To do
+so, the client program must invoke ``llvm_start_multithreaded()`` before making
+any concurrent LLVM API calls.  To subsequently tear down these structures, use
+the ``llvm_stop_multithreaded()`` call.  You can also use the
+``llvm_is_multithreaded()`` call to check the status of multithreaded mode.
+
+Note that both of these calls must be made *in isolation*.  That is to say that
+no other LLVM API calls may be executing at any time during the execution of
+``llvm_start_multithreaded()`` or ``llvm_stop_multithreaded``.  It's is the
+client's responsibility to enforce this isolation.
+
+The return value of ``llvm_start_multithreaded()`` indicates the success or
+failure of the initialization.  Failure typically indicates that your copy of
+LLVM was built without multithreading support, typically because GCC atomic
+intrinsics were not found in your system compiler.  In this case, the LLVM API
+will not be safe for concurrent calls.  However, it *will* be safe for hosting
+threaded applications in the JIT, though :ref:`care must be taken
+<jitthreading>` to ensure that side exits and the like do not accidentally
+result in concurrent LLVM API calls.
+
+.. _shutdown:
+
+Ending Execution with ``llvm_shutdown()``
+-----------------------------------------
+
+When you are done using the LLVM APIs, you should call ``llvm_shutdown()`` to
+deallocate memory used for internal structures.  This will also invoke
+``llvm_stop_multithreaded()`` if LLVM is operating in multithreaded mode.  As
+such, ``llvm_shutdown()`` requires the same isolation guarantees as
+``llvm_stop_multithreaded()``.
+
+Note that, if you use scope-based shutdown, you can use the
+``llvm_shutdown_obj`` class, which calls ``llvm_shutdown()`` in its destructor.
+
+.. _managedstatic:
+
+Lazy Initialization with ``ManagedStatic``
+------------------------------------------
+
+``ManagedStatic`` is a utility class in LLVM used to implement static
+initialization of static resources, such as the global type tables.  Before the
+invocation of ``llvm_shutdown()``, it implements a simple lazy initialization
+scheme.  Once ``llvm_start_multithreaded()`` returns, however, it uses
+double-checked locking to implement thread-safe lazy initialization.
+
+Note that, because no other threads are allowed to issue LLVM API calls before
+``llvm_start_multithreaded()`` returns, it is possible to have
+``ManagedStatic``\ s of ``llvm::sys::Mutex``\ s.
+
+The ``llvm_acquire_global_lock()`` and ``llvm_release_global_lock`` APIs provide
+access to the global lock used to implement the double-checked locking for lazy
+initialization.  These should only be used internally to LLVM, and only if you
+know what you're doing!
+
+.. _llvmcontext:
+
+Achieving Isolation with ``LLVMContext``
+----------------------------------------
+
+``LLVMContext`` is an opaque class in the LLVM API which clients can use to
+operate multiple, isolated instances of LLVM concurrently within the same
+address space.  For instance, in a hypothetical compile-server, the compilation
+of an individual translation unit is conceptually independent from all the
+others, and it would be desirable to be able to compile incoming translation
+units concurrently on independent server threads.  Fortunately, ``LLVMContext``
+exists to enable just this kind of scenario!
+
+Conceptually, ``LLVMContext`` provides isolation.  Every LLVM entity
+(``Module``\ s, ``Value``\ s, ``Type``\ s, ``Constant``\ s, etc.) in LLVM's
+in-memory IR belongs to an ``LLVMContext``.  Entities in different contexts
+*cannot* interact with each other: ``Module``\ s in different contexts cannot be
+linked together, ``Function``\ s cannot be added to ``Module``\ s in different
+contexts, etc.  What this means is that is is safe to compile on multiple
+threads simultaneously, as long as no two threads operate on entities within the
+same context.
+
+In practice, very few places in the API require the explicit specification of a
+``LLVMContext``, other than the ``Type`` creation/lookup APIs.  Because every
+``Type`` carries a reference to its owning context, most other entities can
+determine what context they belong to by looking at their own ``Type``.  If you
+are adding new entities to LLVM IR, please try to maintain this interface
+design.
+
+For clients that do *not* require the benefits of isolation, LLVM provides a
+convenience API ``getGlobalContext()``.  This returns a global, lazily
+initialized ``LLVMContext`` that may be used in situations where isolation is
+not a concern.
+
+.. _jitthreading:
+
+Threads and the JIT
+-------------------
+
+LLVM's "eager" JIT compiler is safe to use in threaded programs.  Multiple
+threads can call ``ExecutionEngine::getPointerToFunction()`` or
+``ExecutionEngine::runFunction()`` concurrently, and multiple threads can run
+code output by the JIT concurrently.  The user must still ensure that only one
+thread accesses IR in a given ``LLVMContext`` while another thread might be
+modifying it.  One way to do that is to always hold the JIT lock while accessing
+IR outside the JIT (the JIT *modifies* the IR by adding ``CallbackVH``\ s).
+Another way is to only call ``getPointerToFunction()`` from the
+``LLVMContext``'s thread.
+
+When the JIT is configured to compile lazily (using
+``ExecutionEngine::DisableLazyCompilation(false)``), there is currently a `race
+condition <http://llvm.org/bugs/show_bug.cgi?id=5184>`_ in updating call sites
+after a function is lazily-jitted.  It's still possible to use the lazy JIT in a
+threaded program if you ensure that only one thread at a time can call any
+particular lazy stub and that the JIT lock guards any IR access, but we suggest
+using only the eager JIT in threaded programs.
+
+.. _advanced:
+
+Advanced Topics
+===============
+
+This section describes some of the advanced or obscure API's that most clients
+do not need to be aware of.  These API's tend manage the inner workings of the
+LLVM system, and only need to be accessed in unusual circumstances.
+
+.. _SymbolTable:
+
+The ``ValueSymbolTable`` class
+------------------------------
+
+The ``ValueSymbolTable`` (`doxygen
+<http://llvm.org/doxygen/classllvm_1_1ValueSymbolTable.html>`__) class provides
+a symbol table that the :ref:`Function <c_Function>` and Module_ classes use for
+naming value definitions.  The symbol table can provide a name for any Value_.
+
+Note that the ``SymbolTable`` class should not be directly accessed by most
+clients.  It should only be used when iteration over the symbol table names
+themselves are required, which is very special purpose.  Note that not all LLVM
+Value_\ s have names, and those without names (i.e. they have an empty name) do
+not exist in the symbol table.
+
+Symbol tables support iteration over the values in the symbol table with
+``begin/end/iterator`` and supports querying to see if a specific name is in the
+symbol table (with ``lookup``).  The ``ValueSymbolTable`` class exposes no
+public mutator methods, instead, simply call ``setName`` on a value, which will
+autoinsert it into the appropriate symbol table.
+
+.. _UserLayout:
+
+The ``User`` and owned ``Use`` classes' memory layout
+-----------------------------------------------------
+
+The ``User`` (`doxygen <http://llvm.org/doxygen/classllvm_1_1User.html>`__)
+class provides a basis for expressing the ownership of ``User`` towards other
+`Value instance <http://llvm.org/doxygen/classllvm_1_1Value.html>`_\ s.  The
+``Use`` (`doxygen <http://llvm.org/doxygen/classllvm_1_1Use.html>`__) helper
+class is employed to do the bookkeeping and to facilitate *O(1)* addition and
+removal.
+
+.. _Use2User:
+
+Interaction and relationship between ``User`` and ``Use`` objects
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+A subclass of ``User`` can choose between incorporating its ``Use`` objects or
+refer to them out-of-line by means of a pointer.  A mixed variant (some ``Use``
+s inline others hung off) is impractical and breaks the invariant that the
+``Use`` objects belonging to the same ``User`` form a contiguous array.
+
+We have 2 different layouts in the ``User`` (sub)classes:
+
+* Layout a)
+
+  The ``Use`` object(s) are inside (resp. at fixed offset) of the ``User``
+  object and there are a fixed number of them.
+
+* Layout b)
+
+  The ``Use`` object(s) are referenced by a pointer to an array from the
+  ``User`` object and there may be a variable number of them.
+
+As of v2.4 each layout still possesses a direct pointer to the start of the
+array of ``Use``\ s.  Though not mandatory for layout a), we stick to this
+redundancy for the sake of simplicity.  The ``User`` object also stores the
+number of ``Use`` objects it has. (Theoretically this information can also be
+calculated given the scheme presented below.)
+
+Special forms of allocation operators (``operator new``) enforce the following
+memory layouts:
+
+* Layout a) is modelled by prepending the ``User`` object by the ``Use[]``
+  array.
+
+  .. code-block:: none
+
+    ...---.---.---.---.-------...
+      | P | P | P | P | User
+    '''---'---'---'---'-------'''
+
+* Layout b) is modelled by pointing at the ``Use[]`` array.
+
+  .. code-block:: none
+
+    .-------...
+    | User
+    '-------'''
+        |
+        v
+        .---.---.---.---...
+        | P | P | P | P |
+        '---'---'---'---'''
+
+*(In the above figures* '``P``' *stands for the* ``Use**`` *that is stored in
+each* ``Use`` *object in the member* ``Use::Prev`` *)*
+
+.. _Waymarking:
+
+The waymarking algorithm
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Since the ``Use`` objects are deprived of the direct (back)pointer to their
+``User`` objects, there must be a fast and exact method to recover it.  This is
+accomplished by the following scheme:
+
+A bit-encoding in the 2 LSBits (least significant bits) of the ``Use::Prev``
+allows to find the start of the ``User`` object:
+
+* ``00`` –> binary digit 0
+
+* ``01`` –> binary digit 1
+
+* ``10`` –> stop and calculate (``s``)
+
+* ``11`` –> full stop (``S``)
+
+Given a ``Use*``, all we have to do is to walk till we get a stop and we either
+have a ``User`` immediately behind or we have to walk to the next stop picking
+up digits and calculating the offset:
+
+.. code-block:: none
+
+  .---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.----------------
+  | 1 | s | 1 | 0 | 1 | 0 | s | 1 | 1 | 0 | s | 1 | 1 | s | 1 | S | User (or User*)
+  '---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'----------------
+      |+15                |+10            |+6         |+3     |+1
+      |                   |               |           |       | __>
+      |                   |               |           | __________>
+      |                   |               | ______________________>
+      |                   | ______________________________________>
+      | __________________________________________________________>
+
+Only the significant number of bits need to be stored between the stops, so that
+the *worst case is 20 memory accesses* when there are 1000 ``Use`` objects
+associated with a ``User``.
+
+.. _ReferenceImpl:
+
+Reference implementation
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+The following literate Haskell fragment demonstrates the concept:
+
+.. code-block:: haskell
+
+  > import Test.QuickCheck
+  >
+  > digits :: Int -> [Char] -> [Char]
+  > digits 0 acc = '0' : acc
+  > digits 1 acc = '1' : acc
+  > digits n acc = digits (n `div` 2) $ digits (n `mod` 2) acc
+  >
+  > dist :: Int -> [Char] -> [Char]
+  > dist 0 [] = ['S']
+  > dist 0 acc = acc
+  > dist 1 acc = let r = dist 0 acc in 's' : digits (length r) r
+  > dist n acc = dist (n - 1) $ dist 1 acc
+  >
+  > takeLast n ss = reverse $ take n $ reverse ss
+  >
+  > test = takeLast 40 $ dist 20 []
+  >
+
+Printing <test> gives: ``"1s100000s11010s10100s1111s1010s110s11s1S"``
+
+The reverse algorithm computes the length of the string just by examining a
+certain prefix:
+
+.. code-block:: haskell
+
+  > pref :: [Char] -> Int
+  > pref "S" = 1
+  > pref ('s':'1':rest) = decode 2 1 rest
+  > pref (_:rest) = 1 + pref rest
+  >
+  > decode walk acc ('0':rest) = decode (walk + 1) (acc * 2) rest
+  > decode walk acc ('1':rest) = decode (walk + 1) (acc * 2 + 1) rest
+  > decode walk acc _ = walk + acc
+  >
+
+Now, as expected, printing <pref test> gives ``40``.
+
+We can *quickCheck* this with following property:
+
+.. code-block:: haskell
+
+  > testcase = dist 2000 []
+  > testcaseLength = length testcase
+  >
+  > identityProp n = n > 0 && n <= testcaseLength ==> length arr == pref arr
+  >     where arr = takeLast n testcase
+  >
+
+As expected <quickCheck identityProp> gives:
+
+::
+
+  *Main> quickCheck identityProp
+  OK, passed 100 tests.
+
+Let's be a bit more exhaustive:
+
+.. code-block:: haskell
+
+  >
+  > deepCheck p = check (defaultConfig { configMaxTest = 500 }) p
+  >
+
+And here is the result of <deepCheck identityProp>:
+
+::
+
+  *Main> deepCheck identityProp
+  OK, passed 500 tests.
+
+.. _Tagging:
+
+Tagging considerations
+^^^^^^^^^^^^^^^^^^^^^^
+
+To maintain the invariant that the 2 LSBits of each ``Use**`` in ``Use`` never
+change after being set up, setters of ``Use::Prev`` must re-tag the new
+``Use**`` on every modification.  Accordingly getters must strip the tag bits.
+
+For layout b) instead of the ``User`` we find a pointer (``User*`` with LSBit
+set).  Following this pointer brings us to the ``User``.  A portable trick
+ensures that the first bytes of ``User`` (if interpreted as a pointer) never has
+the LSBit set. (Portability is relying on the fact that all known compilers
+place the ``vptr`` in the first word of the instances.)
+
+.. _coreclasses:
+
+The Core LLVM Class Hierarchy Reference
+=======================================
+
+``#include "llvm/Type.h"``
+
+header source: `Type.h <http://llvm.org/doxygen/Type_8h-source.html>`_
+
+doxygen info: `Type Clases <http://llvm.org/doxygen/classllvm_1_1Type.html>`_
+
+The Core LLVM classes are the primary means of representing the program being
+inspected or transformed.  The core LLVM classes are defined in header files in
+the ``include/llvm/`` directory, and implemented in the ``lib/VMCore``
+directory.
+
+.. _Type:
+
+The Type class and Derived Types
+--------------------------------
+
+``Type`` is a superclass of all type classes.  Every ``Value`` has a ``Type``.
+``Type`` cannot be instantiated directly but only through its subclasses.
+Certain primitive types (``VoidType``, ``LabelType``, ``FloatType`` and
+``DoubleType``) have hidden subclasses.  They are hidden because they offer no
+useful functionality beyond what the ``Type`` class offers except to distinguish
+themselves from other subclasses of ``Type``.
+
+All other types are subclasses of ``DerivedType``.  Types can be named, but this
+is not a requirement.  There exists exactly one instance of a given shape at any
+one time.  This allows type equality to be performed with address equality of
+the Type Instance.  That is, given two ``Type*`` values, the types are identical
+if the pointers are identical.
+
+.. _m_Type:
+
+Important Public Methods
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+* ``bool isIntegerTy() const``: Returns true for any integer type.
+
+* ``bool isFloatingPointTy()``: Return true if this is one of the five
+  floating point types.
+
+* ``bool isSized()``: Return true if the type has known size.  Things
+  that don't have a size are abstract types, labels and void.
+
+.. _derivedtypes:
+
+Important Derived Types
+^^^^^^^^^^^^^^^^^^^^^^^
+
+``IntegerType``
+  Subclass of DerivedType that represents integer types of any bit width.  Any
+  bit width between ``IntegerType::MIN_INT_BITS`` (1) and
+  ``IntegerType::MAX_INT_BITS`` (~8 million) can be represented.
+
+  * ``static const IntegerType* get(unsigned NumBits)``: get an integer
+    type of a specific bit width.
+
+  * ``unsigned getBitWidth() const``: Get the bit width of an integer type.
+
+``SequentialType``
+  This is subclassed by ArrayType, PointerType and VectorType.
+
+  * ``const Type * getElementType() const``: Returns the type of each
+    of the elements in the sequential type.
+
+``ArrayType``
+  This is a subclass of SequentialType and defines the interface for array
+  types.
+
+  * ``unsigned getNumElements() const``: Returns the number of elements
+    in the array.
+
+``PointerType``
+  Subclass of SequentialType for pointer types.
+
+``VectorType``
+  Subclass of SequentialType for vector types.  A vector type is similar to an
+  ArrayType but is distinguished because it is a first class type whereas
+  ArrayType is not.  Vector types are used for vector operations and are usually
+  small vectors of of an integer or floating point type.
+
+``StructType``
+  Subclass of DerivedTypes for struct types.
+
+.. _FunctionType:
+
+``FunctionType``
+  Subclass of DerivedTypes for function types.
+
+  * ``bool isVarArg() const``: Returns true if it's a vararg function.
+
+  * ``const Type * getReturnType() const``: Returns the return type of the
+    function.
+
+  * ``const Type * getParamType (unsigned i)``: Returns the type of the ith
+    parameter.
+
+  * ``const unsigned getNumParams() const``: Returns the number of formal
+    parameters.
+
+.. _Module:
+
+The ``Module`` class
+--------------------
+
+``#include "llvm/Module.h"``
+
+header source: `Module.h <http://llvm.org/doxygen/Module_8h-source.html>`_
+
+doxygen info: `Module Class <http://llvm.org/doxygen/classllvm_1_1Module.html>`_
+
+The ``Module`` class represents the top level structure present in LLVM
+programs.  An LLVM module is effectively either a translation unit of the
+original program or a combination of several translation units merged by the
+linker.  The ``Module`` class keeps track of a list of :ref:`Function
+<c_Function>`\ s, a list of GlobalVariable_\ s, and a SymbolTable_.
+Additionally, it contains a few helpful member functions that try to make common
+operations easy.
+
+.. _m_Module:
+
+Important Public Members of the ``Module`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* ``Module::Module(std::string name = "")``
+
+  Constructing a Module_ is easy.  You can optionally provide a name for it
+  (probably based on the name of the translation unit).
+
+* | ``Module::iterator`` - Typedef for function list iterator
+  | ``Module::const_iterator`` - Typedef for const_iterator.
+  | ``begin()``, ``end()``, ``size()``, ``empty()``
+
+  These are forwarding methods that make it easy to access the contents of a
+  ``Module`` object's :ref:`Function <c_Function>` list.
+
+* ``Module::FunctionListType &getFunctionList()``
+
+  Returns the list of :ref:`Function <c_Function>`\ s.  This is necessary to use
+  when you need to update the list or perform a complex action that doesn't have
+  a forwarding method.
+
+----------------
+
+* | ``Module::global_iterator`` - Typedef for global variable list iterator
+  | ``Module::const_global_iterator`` - Typedef for const_iterator.
+  | ``global_begin()``, ``global_end()``, ``global_size()``, ``global_empty()``
+
+  These are forwarding methods that make it easy to access the contents of a
+  ``Module`` object's GlobalVariable_ list.
+
+* ``Module::GlobalListType &getGlobalList()``
+
+  Returns the list of GlobalVariable_\ s.  This is necessary to use when you
+  need to update the list or perform a complex action that doesn't have a
+  forwarding method.
+
+----------------
+
+* ``SymbolTable *getSymbolTable()``
+
+  Return a reference to the SymbolTable_ for this ``Module``.
+
+----------------
+
+* ``Function *getFunction(StringRef Name) const``
+
+  Look up the specified function in the ``Module`` SymbolTable_.  If it does not
+  exist, return ``null``.
+
+* ``Function *getOrInsertFunction(const std::string &Name, const FunctionType
+  *T)``
+
+  Look up the specified function in the ``Module`` SymbolTable_.  If it does not
+  exist, add an external declaration for the function and return it.
+
+* ``std::string getTypeName(const Type *Ty)``
+
+  If there is at least one entry in the SymbolTable_ for the specified Type_,
+  return it.  Otherwise return the empty string.
+
+* ``bool addTypeName(const std::string &Name, const Type *Ty)``
+
+  Insert an entry in the SymbolTable_ mapping ``Name`` to ``Ty``.  If there is
+  already an entry for this name, true is returned and the SymbolTable_ is not
+  modified.
+
+.. _Value:
+
+The ``Value`` class
+-------------------
+
+``#include "llvm/Value.h"``
+
+header source: `Value.h <http://llvm.org/doxygen/Value_8h-source.html>`_
+
+doxygen info: `Value Class <http://llvm.org/doxygen/classllvm_1_1Value.html>`_
+
+The ``Value`` class is the most important class in the LLVM Source base.  It
+represents a typed value that may be used (among other things) as an operand to
+an instruction.  There are many different types of ``Value``\ s, such as
+Constant_\ s, Argument_\ s.  Even Instruction_\ s and :ref:`Function
+<c_Function>`\ s are ``Value``\ s.
+
+A particular ``Value`` may be used many times in the LLVM representation for a
+program.  For example, an incoming argument to a function (represented with an
+instance of the Argument_ class) is "used" by every instruction in the function
+that references the argument.  To keep track of this relationship, the ``Value``
+class keeps a list of all of the ``User``\ s that is using it (the User_ class
+is a base class for all nodes in the LLVM graph that can refer to ``Value``\ s).
+This use list is how LLVM represents def-use information in the program, and is
+accessible through the ``use_*`` methods, shown below.
+
+Because LLVM is a typed representation, every LLVM ``Value`` is typed, and this
+Type_ is available through the ``getType()`` method.  In addition, all LLVM
+values can be named.  The "name" of the ``Value`` is a symbolic string printed
+in the LLVM code:
+
+.. code-block:: llvm
+
+  %foo = add i32 1, 2
+
+.. _nameWarning:
+
+The name of this instruction is "foo". **NOTE** that the name of any value may
+be missing (an empty string), so names should **ONLY** be used for debugging
+(making the source code easier to read, debugging printouts), they should not be
+used to keep track of values or map between them.  For this purpose, use a
+``std::map`` of pointers to the ``Value`` itself instead.
+
+One important aspect of LLVM is that there is no distinction between an SSA
+variable and the operation that produces it.  Because of this, any reference to
+the value produced by an instruction (or the value available as an incoming
+argument, for example) is represented as a direct pointer to the instance of the
+class that represents this value.  Although this may take some getting used to,
+it simplifies the representation and makes it easier to manipulate.
+
+.. _m_Value:
+
+Important Public Members of the ``Value`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* | ``Value::use_iterator`` - Typedef for iterator over the use-list
+  | ``Value::const_use_iterator`` - Typedef for const_iterator over the
+    use-list
+  | ``unsigned use_size()`` - Returns the number of users of the value.
+  | ``bool use_empty()`` - Returns true if there are no users.
+  | ``use_iterator use_begin()`` - Get an iterator to the start of the
+    use-list.
+  | ``use_iterator use_end()`` - Get an iterator to the end of the use-list.
+  | ``User *use_back()`` - Returns the last element in the list.
+
+  These methods are the interface to access the def-use information in LLVM.
+  As with all other iterators in LLVM, the naming conventions follow the
+  conventions defined by the STL_.
+
+* ``Type *getType() const``
+  This method returns the Type of the Value.
+
+* | ``bool hasName() const``
+  | ``std::string getName() const``
+  | ``void setName(const std::string &Name)``
+
+  This family of methods is used to access and assign a name to a ``Value``, be
+  aware of the :ref:`precaution above <nameWarning>`.
+
+* ``void replaceAllUsesWith(Value *V)``
+
+  This method traverses the use list of a ``Value`` changing all User_\ s of the
+  current value to refer to "``V``" instead.  For example, if you detect that an
+  instruction always produces a constant value (for example through constant
+  folding), you can replace all uses of the instruction with the constant like
+  this:
+
+  .. code-block:: c++
+
+    Inst->replaceAllUsesWith(ConstVal);
+
+.. _User:
+
+The ``User`` class
+------------------
+
+``#include "llvm/User.h"``
+
+header source: `User.h <http://llvm.org/doxygen/User_8h-source.html>`_
+
+doxygen info: `User Class <http://llvm.org/doxygen/classllvm_1_1User.html>`_
+
+Superclass: Value_
+
+The ``User`` class is the common base class of all LLVM nodes that may refer to
+``Value``\ s.  It exposes a list of "Operands" that are all of the ``Value``\ s
+that the User is referring to.  The ``User`` class itself is a subclass of
+``Value``.
+
+The operands of a ``User`` point directly to the LLVM ``Value`` that it refers
+to.  Because LLVM uses Static Single Assignment (SSA) form, there can only be
+one definition referred to, allowing this direct connection.  This connection
+provides the use-def information in LLVM.
+
+.. _m_User:
+
+Important Public Members of the ``User`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``User`` class exposes the operand list in two ways: through an index access
+interface and through an iterator based interface.
+
+* | ``Value *getOperand(unsigned i)``
+  | ``unsigned getNumOperands()``
+
+  These two methods expose the operands of the ``User`` in a convenient form for
+  direct access.
+
+* | ``User::op_iterator`` - Typedef for iterator over the operand list
+  | ``op_iterator op_begin()`` - Get an iterator to the start of the operand
+    list.
+  | ``op_iterator op_end()`` - Get an iterator to the end of the operand list.
+
+  Together, these methods make up the iterator based interface to the operands
+  of a ``User``.
+
+
+.. _Instruction:
+
+The ``Instruction`` class
+-------------------------
+
+``#include "llvm/Instruction.h"``
+
+header source: `Instruction.h
+<http://llvm.org/doxygen/Instruction_8h-source.html>`_
+
+doxygen info: `Instruction Class
+<http://llvm.org/doxygen/classllvm_1_1Instruction.html>`_
+
+Superclasses: User_, Value_
+
+The ``Instruction`` class is the common base class for all LLVM instructions.
+It provides only a few methods, but is a very commonly used class.  The primary
+data tracked by the ``Instruction`` class itself is the opcode (instruction
+type) and the parent BasicBlock_ the ``Instruction`` is embedded into.  To
+represent a specific type of instruction, one of many subclasses of
+``Instruction`` are used.
+
+Because the ``Instruction`` class subclasses the User_ class, its operands can
+be accessed in the same way as for other ``User``\ s (with the
+``getOperand()``/``getNumOperands()`` and ``op_begin()``/``op_end()`` methods).
+An important file for the ``Instruction`` class is the ``llvm/Instruction.def``
+file.  This file contains some meta-data about the various different types of
+instructions in LLVM.  It describes the enum values that are used as opcodes
+(for example ``Instruction::Add`` and ``Instruction::ICmp``), as well as the
+concrete sub-classes of ``Instruction`` that implement the instruction (for
+example BinaryOperator_ and CmpInst_).  Unfortunately, the use of macros in this
+file confuses doxygen, so these enum values don't show up correctly in the
+`doxygen output <http://llvm.org/doxygen/classllvm_1_1Instruction.html>`_.
+
+.. _s_Instruction:
+
+Important Subclasses of the ``Instruction`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. _BinaryOperator:
+
+* ``BinaryOperator``
+
+  This subclasses represents all two operand instructions whose operands must be
+  the same type, except for the comparison instructions.
+
+.. _CastInst:
+
+* ``CastInst``
+  This subclass is the parent of the 12 casting instructions.  It provides
+  common operations on cast instructions.
+
+.. _CmpInst:
+
+* ``CmpInst``
+
+  This subclass respresents the two comparison instructions,
+  `ICmpInst <LangRef.html#i_icmp>`_ (integer opreands), and
+  `FCmpInst <LangRef.html#i_fcmp>`_ (floating point operands).
+
+.. _TerminatorInst:
+
+* ``TerminatorInst``
+
+  This subclass is the parent of all terminator instructions (those which can
+  terminate a block).
+
+.. _m_Instruction:
+
+Important Public Members of the ``Instruction`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* ``BasicBlock *getParent()``
+
+  Returns the BasicBlock_ that this
+  ``Instruction`` is embedded into.
+
+* ``bool mayWriteToMemory()``
+
+  Returns true if the instruction writes to memory, i.e. it is a ``call``,
+  ``free``, ``invoke``, or ``store``.
+
+* ``unsigned getOpcode()``
+
+  Returns the opcode for the ``Instruction``.
+
+* ``Instruction *clone() const``
+
+  Returns another instance of the specified instruction, identical in all ways
+  to the original except that the instruction has no parent (i.e. it's not
+  embedded into a BasicBlock_), and it has no name.
+
+.. _Constant:
+
+The ``Constant`` class and subclasses
+-------------------------------------
+
+Constant represents a base class for different types of constants.  It is
+subclassed by ConstantInt, ConstantArray, etc. for representing the various
+types of Constants.  GlobalValue_ is also a subclass, which represents the
+address of a global variable or function.
+
+.. _s_Constant:
+
+Important Subclasses of Constant
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* ConstantInt : This subclass of Constant represents an integer constant of
+  any width.
+
+  * ``const APInt& getValue() const``: Returns the underlying
+    value of this constant, an APInt value.
+
+  * ``int64_t getSExtValue() const``: Converts the underlying APInt value to an
+    int64_t via sign extension.  If the value (not the bit width) of the APInt
+    is too large to fit in an int64_t, an assertion will result.  For this
+    reason, use of this method is discouraged.
+
+  * ``uint64_t getZExtValue() const``: Converts the underlying APInt value
+    to a uint64_t via zero extension.  IF the value (not the bit width) of the
+    APInt is too large to fit in a uint64_t, an assertion will result.  For this
+    reason, use of this method is discouraged.
+
+  * ``static ConstantInt* get(const APInt& Val)``: Returns the ConstantInt
+    object that represents the value provided by ``Val``.  The type is implied
+    as the IntegerType that corresponds to the bit width of ``Val``.
+
+  * ``static ConstantInt* get(const Type *Ty, uint64_t Val)``: Returns the
+    ConstantInt object that represents the value provided by ``Val`` for integer
+    type ``Ty``.
+
+* ConstantFP : This class represents a floating point constant.
+
+  * ``double getValue() const``: Returns the underlying value of this constant.
+
+* ConstantArray : This represents a constant array.
+
+  * ``const std::vector<Use> &getValues() const``: Returns a vector of
+    component constants that makeup this array.
+
+* ConstantStruct : This represents a constant struct.
+
+  * ``const std::vector<Use> &getValues() const``: Returns a vector of
+    component constants that makeup this array.
+
+* GlobalValue : This represents either a global variable or a function.  In
+  either case, the value is a constant fixed address (after linking).
+
+.. _GlobalValue:
+
+The ``GlobalValue`` class
+-------------------------
+
+``#include "llvm/GlobalValue.h"``
+
+header source: `GlobalValue.h
+<http://llvm.org/doxygen/GlobalValue_8h-source.html>`_
+
+doxygen info: `GlobalValue Class
+<http://llvm.org/doxygen/classllvm_1_1GlobalValue.html>`_
+
+Superclasses: Constant_, User_, Value_
+
+Global values ( GlobalVariable_\ s or :ref:`Function <c_Function>`\ s) are the
+only LLVM values that are visible in the bodies of all :ref:`Function
+<c_Function>`\ s.  Because they are visible at global scope, they are also
+subject to linking with other globals defined in different translation units.
+To control the linking process, ``GlobalValue``\ s know their linkage rules.
+Specifically, ``GlobalValue``\ s know whether they have internal or external
+linkage, as defined by the ``LinkageTypes`` enumeration.
+
+If a ``GlobalValue`` has internal linkage (equivalent to being ``static`` in C),
+it is not visible to code outside the current translation unit, and does not
+participate in linking.  If it has external linkage, it is visible to external
+code, and does participate in linking.  In addition to linkage information,
+``GlobalValue``\ s keep track of which Module_ they are currently part of.
+
+Because ``GlobalValue``\ s are memory objects, they are always referred to by
+their **address**.  As such, the Type_ of a global is always a pointer to its
+contents.  It is important to remember this when using the ``GetElementPtrInst``
+instruction because this pointer must be dereferenced first.  For example, if
+you have a ``GlobalVariable`` (a subclass of ``GlobalValue)`` that is an array
+of 24 ints, type ``[24 x i32]``, then the ``GlobalVariable`` is a pointer to
+that array.  Although the address of the first element of this array and the
+value of the ``GlobalVariable`` are the same, they have different types.  The
+``GlobalVariable``'s type is ``[24 x i32]``.  The first element's type is
+``i32.`` Because of this, accessing a global value requires you to dereference
+the pointer with ``GetElementPtrInst`` first, then its elements can be accessed.
+This is explained in the `LLVM Language Reference Manual
+<LangRef.html#globalvars>`_.
+
+.. _m_GlobalValue:
+
+Important Public Members of the ``GlobalValue`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* | ``bool hasInternalLinkage() const``
+  | ``bool hasExternalLinkage() const``
+  | ``void setInternalLinkage(bool HasInternalLinkage)``
+
+  These methods manipulate the linkage characteristics of the ``GlobalValue``.
+
+* ``Module *getParent()``
+
+  This returns the Module_ that the
+  GlobalValue is currently embedded into.
+
+.. _c_Function:
+
+The ``Function`` class
+----------------------
+
+``#include "llvm/Function.h"``
+
+header source: `Function.h <http://llvm.org/doxygen/Function_8h-source.html>`_
+
+doxygen info: `Function Class
+<http://llvm.org/doxygen/classllvm_1_1Function.html>`_
+
+Superclasses: GlobalValue_, Constant_, User_, Value_
+
+The ``Function`` class represents a single procedure in LLVM.  It is actually
+one of the more complex classes in the LLVM hierarchy because it must keep track
+of a large amount of data.  The ``Function`` class keeps track of a list of
+BasicBlock_\ s, a list of formal Argument_\ s, and a SymbolTable_.
+
+The list of BasicBlock_\ s is the most commonly used part of ``Function``
+objects.  The list imposes an implicit ordering of the blocks in the function,
+which indicate how the code will be laid out by the backend.  Additionally, the
+first BasicBlock_ is the implicit entry node for the ``Function``.  It is not
+legal in LLVM to explicitly branch to this initial block.  There are no implicit
+exit nodes, and in fact there may be multiple exit nodes from a single
+``Function``.  If the BasicBlock_ list is empty, this indicates that the
+``Function`` is actually a function declaration: the actual body of the function
+hasn't been linked in yet.
+
+In addition to a list of BasicBlock_\ s, the ``Function`` class also keeps track
+of the list of formal Argument_\ s that the function receives.  This container
+manages the lifetime of the Argument_ nodes, just like the BasicBlock_ list does
+for the BasicBlock_\ s.
+
+The SymbolTable_ is a very rarely used LLVM feature that is only used when you
+have to look up a value by name.  Aside from that, the SymbolTable_ is used
+internally to make sure that there are not conflicts between the names of
+Instruction_\ s, BasicBlock_\ s, or Argument_\ s in the function body.
+
+Note that ``Function`` is a GlobalValue_ and therefore also a Constant_.  The
+value of the function is its address (after linking) which is guaranteed to be
+constant.
+
+.. _m_Function:
+
+Important Public Members of the ``Function``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* ``Function(const FunctionType *Ty, LinkageTypes Linkage,
+  const std::string &N = "", Module* Parent = 0)``
+
+  Constructor used when you need to create new ``Function``\ s to add the
+  program.  The constructor must specify the type of the function to create and
+  what type of linkage the function should have.  The FunctionType_ argument
+  specifies the formal arguments and return value for the function.  The same
+  FunctionType_ value can be used to create multiple functions.  The ``Parent``
+  argument specifies the Module in which the function is defined.  If this
+  argument is provided, the function will automatically be inserted into that
+  module's list of functions.
+
+* ``bool isDeclaration()``
+
+  Return whether or not the ``Function`` has a body defined.  If the function is
+  "external", it does not have a body, and thus must be resolved by linking with
+  a function defined in a different translation unit.
+
+* | ``Function::iterator`` - Typedef for basic block list iterator
+  | ``Function::const_iterator`` - Typedef for const_iterator.
+  | ``begin()``, ``end()``, ``size()``, ``empty()``
+
+  These are forwarding methods that make it easy to access the contents of a
+  ``Function`` object's BasicBlock_ list.
+
+* ``Function::BasicBlockListType &getBasicBlockList()``
+
+  Returns the list of BasicBlock_\ s.  This is necessary to use when you need to
+  update the list or perform a complex action that doesn't have a forwarding
+  method.
+
+* | ``Function::arg_iterator`` - Typedef for the argument list iterator
+  | ``Function::const_arg_iterator`` - Typedef for const_iterator.
+  | ``arg_begin()``, ``arg_end()``, ``arg_size()``, ``arg_empty()``
+
+  These are forwarding methods that make it easy to access the contents of a
+  ``Function`` object's Argument_ list.
+
+* ``Function::ArgumentListType &getArgumentList()``
+
+  Returns the list of Argument_.  This is necessary to use when you need to
+  update the list or perform a complex action that doesn't have a forwarding
+  method.
+
+* ``BasicBlock &getEntryBlock()``
+
+  Returns the entry ``BasicBlock`` for the function.  Because the entry block
+  for the function is always the first block, this returns the first block of
+  the ``Function``.
+
+* | ``Type *getReturnType()``
+  | ``FunctionType *getFunctionType()``
+
+  This traverses the Type_ of the ``Function`` and returns the return type of
+  the function, or the FunctionType_ of the actual function.
+
+* ``SymbolTable *getSymbolTable()``
+
+  Return a pointer to the SymbolTable_ for this ``Function``.
+
+.. _GlobalVariable:
+
+The ``GlobalVariable`` class
+----------------------------
+
+``#include "llvm/GlobalVariable.h"``
+
+header source: `GlobalVariable.h
+<http://llvm.org/doxygen/GlobalVariable_8h-source.html>`_
+
+doxygen info: `GlobalVariable Class
+<http://llvm.org/doxygen/classllvm_1_1GlobalVariable.html>`_
+
+Superclasses: GlobalValue_, Constant_, User_, Value_
+
+Global variables are represented with the (surprise surprise) ``GlobalVariable``
+class.  Like functions, ``GlobalVariable``\ s are also subclasses of
+GlobalValue_, and as such are always referenced by their address (global values
+must live in memory, so their "name" refers to their constant address).  See
+GlobalValue_ for more on this.  Global variables may have an initial value
+(which must be a Constant_), and if they have an initializer, they may be marked
+as "constant" themselves (indicating that their contents never change at
+runtime).
+
+.. _m_GlobalVariable:
+
+Important Public Members of the ``GlobalVariable`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* ``GlobalVariable(const Type *Ty, bool isConstant, LinkageTypes &Linkage,
+  Constant *Initializer = 0, const std::string &Name = "", Module* Parent = 0)``
+
+  Create a new global variable of the specified type.  If ``isConstant`` is true
+  then the global variable will be marked as unchanging for the program.  The
+  Linkage parameter specifies the type of linkage (internal, external, weak,
+  linkonce, appending) for the variable.  If the linkage is InternalLinkage,
+  WeakAnyLinkage, WeakODRLinkage, LinkOnceAnyLinkage or LinkOnceODRLinkage, then
+  the resultant global variable will have internal linkage.  AppendingLinkage
+  concatenates together all instances (in different translation units) of the
+  variable into a single variable but is only applicable to arrays.  See the
+  `LLVM Language Reference <LangRef.html#modulestructure>`_ for further details
+  on linkage types.  Optionally an initializer, a name, and the module to put
+  the variable into may be specified for the global variable as well.
+
+* ``bool isConstant() const``
+
+  Returns true if this is a global variable that is known not to be modified at
+  runtime.
+
+* ``bool hasInitializer()``
+
+  Returns true if this ``GlobalVariable`` has an intializer.
+
+* ``Constant *getInitializer()``
+
+  Returns the initial value for a ``GlobalVariable``.  It is not legal to call
+  this method if there is no initializer.
+
+.. _BasicBlock:
+
+The ``BasicBlock`` class
+------------------------
+
+``#include "llvm/BasicBlock.h"``
+
+header source: `BasicBlock.h
+<http://llvm.org/doxygen/BasicBlock_8h-source.html>`_
+
+doxygen info: `BasicBlock Class
+<http://llvm.org/doxygen/classllvm_1_1BasicBlock.html>`_
+
+Superclass: Value_
+
+This class represents a single entry single exit section of the code, commonly
+known as a basic block by the compiler community.  The ``BasicBlock`` class
+maintains a list of Instruction_\ s, which form the body of the block.  Matching
+the language definition, the last element of this list of instructions is always
+a terminator instruction (a subclass of the TerminatorInst_ class).
+
+In addition to tracking the list of instructions that make up the block, the
+``BasicBlock`` class also keeps track of the :ref:`Function <c_Function>` that
+it is embedded into.
+
+Note that ``BasicBlock``\ s themselves are Value_\ s, because they are
+referenced by instructions like branches and can go in the switch tables.
+``BasicBlock``\ s have type ``label``.
+
+.. _m_BasicBlock:
+
+Important Public Members of the ``BasicBlock`` class
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+* ``BasicBlock(const std::string &Name = "", Function *Parent = 0)``
+
+  The ``BasicBlock`` constructor is used to create new basic blocks for
+  insertion into a function.  The constructor optionally takes a name for the
+  new block, and a :ref:`Function <c_Function>` to insert it into.  If the
+  ``Parent`` parameter is specified, the new ``BasicBlock`` is automatically
+  inserted at the end of the specified :ref:`Function <c_Function>`, if not
+  specified, the BasicBlock must be manually inserted into the :ref:`Function
+  <c_Function>`.
+
+* | ``BasicBlock::iterator`` - Typedef for instruction list iterator
+  | ``BasicBlock::const_iterator`` - Typedef for const_iterator.
+  | ``begin()``, ``end()``, ``front()``, ``back()``,
+    ``size()``, ``empty()``
+    STL-style functions for accessing the instruction list.
+
+  These methods and typedefs are forwarding functions that have the same
+  semantics as the standard library methods of the same names.  These methods
+  expose the underlying instruction list of a basic block in a way that is easy
+  to manipulate.  To get the full complement of container operations (including
+  operations to update the list), you must use the ``getInstList()`` method.
+
+* ``BasicBlock::InstListType &getInstList()``
+
+  This method is used to get access to the underlying container that actually
+  holds the Instructions.  This method must be used when there isn't a
+  forwarding function in the ``BasicBlock`` class for the operation that you
+  would like to perform.  Because there are no forwarding functions for
+  "updating" operations, you need to use this if you want to update the contents
+  of a ``BasicBlock``.
+
+* ``Function *getParent()``
+
+  Returns a pointer to :ref:`Function <c_Function>` the block is embedded into,
+  or a null pointer if it is homeless.
+
+* ``TerminatorInst *getTerminator()``
+
+  Returns a pointer to the terminator instruction that appears at the end of the
+  ``BasicBlock``.  If there is no terminator instruction, or if the last
+  instruction in the block is not a terminator, then a null pointer is returned.
+
+.. _Argument:
+
+The ``Argument`` class
+----------------------
+
+This subclass of Value defines the interface for incoming formal arguments to a
+function.  A Function maintains a list of its formal arguments.  An argument has
+a pointer to the parent Function.
+
+
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
deleted file mode 100644
index 31cb26ca02..0000000000
--- a/docs/ReleaseNotes.html
+++ /dev/null
@@ -1,854 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-  <title>LLVM 3.2 Release Notes</title>
-</head>
-<body>
-
-<h1>LLVM 3.2 Release Notes</h1>
-
-<div>
-<img style="float:right" src="http://llvm.org/img/DragonSmall.png"
-     width="136" height="136" alt="LLVM Dragon Logo">
-</div>
-
-<ol>
-  <li><a href="#intro">Introduction</a></li>
-  <li><a href="#subproj">Sub-project Status Update</a></li>
-  <li><a href="#externalproj">External Projects Using LLVM 3.2</a></li>
-  <li><a href="#whatsnew">What's New in LLVM?</a></li>
-  <li><a href="GettingStarted.html">Installation Instructions</a></li>
-  <li><a href="#knownproblems">Known Problems</a></li>
-  <li><a href="#additionalinfo">Additional Information</a></li>
-</ol>
-
-<div class="doc_author">
-  <p>Written by the <a href="http://llvm.org/">LLVM Team</a></p>
-</div>
-
-<h1 style="color:red">These are in-progress notes for the upcoming LLVM 3.2
-release.<br>
-You may prefer the
-<a href="http://llvm.org/releases/3.1/docs/ReleaseNotes.html">LLVM 3.1
-Release Notes</a>.</h1>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="intro">Introduction</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This document contains the release notes for the LLVM Compiler
-   Infrastructure, release 3.2.  Here we describe the status of LLVM, including
-   major improvements from the previous release, improvements in various
-   subprojects of LLVM, and some of the current users of the code.  All LLVM
-   releases may be downloaded from the <a href="http://llvm.org/releases/">LLVM
-   releases web site</a>.</p>
-
-<p>For more information about LLVM, including information about the latest
-   release, please check out the <a href="http://llvm.org/">main LLVM web
-   site</a>.  If you have questions or comments,
-   the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVM
-   Developer's Mailing List</a> is a good place to send them.</p>
-
-<p>Note that if you are reading this file from a Subversion checkout or the main
-   LLVM web page, this document applies to the <i>next</i> release, not the
-   current one.  To see the release notes for a specific release, please see the
-   <a href="http://llvm.org/releases/">releases page</a>.</p>
-
-</div>
-
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="subproj">Sub-project Status Update</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The LLVM 3.2 distribution currently consists of code from the core LLVM
-   repository, which roughly includes the LLVM optimizers, code generators and
-   supporting tools, and the Clang repository. In addition to this code, the
-   LLVM Project includes other sub-projects that are in development. Here we
-   include updates on these subprojects.</p>
-
-<!--=========================================================================-->
-<h3>
-<a name="clang">Clang: C/C++/Objective-C Frontend Toolkit</a>
-</h3>
-
-<div>
-
-<p><a href="http://clang.llvm.org/">Clang</a> is an LLVM front end for the C,
-   C++, and Objective-C languages. Clang aims to provide a better user
-   experience through expressive diagnostics, a high level of conformance to
-   language standards, fast compilation, and low memory use. Like LLVM, Clang
-   provides a modular, library-based architecture that makes it suitable for
-   creating or integrating with other development tools. Clang is considered a
-   production-quality compiler for C, Objective-C, C++ and Objective-C++ on x86
-   (32- and 64-bit), and for Darwin/ARM targets.</p>
-
-<p>In the LLVM 3.2 time-frame, the Clang team has made many improvements.
-   Highlights include:</p>
-<ul>
-  <li>...</li>
-</ul>
-
-<p>For more details about the changes to Clang since the 3.1 release, see the
-   <a href="http://clang.llvm.org/docs/ReleaseNotes.html">Clang release
-   notes.</a></p>
-
-<p>If Clang rejects your code but another compiler accepts it, please take a
-   look at the <a href="http://clang.llvm.org/compatibility.html">language
-   compatibility</a> guide to make sure this is not intentional or a known
-   issue.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="dragonegg">DragonEgg: GCC front-ends, LLVM back-end</a>
-</h3>
-
-<div>
-
-<p><a href="http://dragonegg.llvm.org/">DragonEgg</a> is a
-   <a href="http://gcc.gnu.org/wiki/plugins">gcc plugin</a> that replaces GCC's
-   optimizers and code generators with LLVM's. It works with gcc-4.5 and gcc-4.6
-   (and partially with gcc-4.7), can target the x86-32/x86-64 and ARM processor
-   families, and has been successfully used on the Darwin, FreeBSD, KFreeBSD,
-   Linux and OpenBSD platforms.  It fully supports Ada, C, C++ and Fortran.  It
-   has partial support for Go, Java, Obj-C and Obj-C++.</p>
-
-<p>The 3.2 release has the following notable changes:</p>
-
-<ul>
-  <li>...</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="compiler-rt">compiler-rt: Compiler Runtime Library</a>
-</h3>
-
-<div>
-
-<p>The new LLVM <a href="http://compiler-rt.llvm.org/">compiler-rt project</a>
-   is a simple library that provides an implementation of the low-level
-   target-specific hooks required by code generation and other runtime
-   components.  For example, when compiling for a 32-bit target, converting a
-   double to a 64-bit unsigned integer is compiled into a runtime call to the
-   <code>__fixunsdfdi</code> function. The compiler-rt library provides highly
-   optimized implementations of this and other low-level routines (some are 3x
-   faster than the equivalent libgcc routines).</p>
-
-<p>The 3.2 release has the following notable changes:</p>
-
-<ul>
-  <li>...</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="lldb">LLDB: Low Level Debugger</a>
-</h3>
-
-<div>
-
-<p><a href="http://lldb.llvm.org">LLDB</a> is a ground-up implementation of a
-   command line debugger, as well as a debugger API that can be used from other
-   applications.  LLDB makes use of the Clang parser to provide high-fidelity
-   expression parsing (particularly for C++) and uses the LLVM JIT for target
-   support.</p>
-
-<p>The 3.2 release has the following notable changes:</p>
-
-<ul>
-  <li>...</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="libc++">libc++: C++ Standard Library</a>
-</h3>
-
-<div>
-
-<p>Like compiler_rt, libc++ is now <a href="DeveloperPolicy.html#license">dual
-   licensed</a> under the MIT and UIUC license, allowing it to be used more
-   permissively.</p>
-
-<p>Within the LLVM 3.2 time-frame there were the following highlights:</p>
-
-<ul>
-  <li>...</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="vmkit">VMKit</a>
-</h3>
-
-<div>
-
-<p>The <a href="http://vmkit.llvm.org/">VMKit project</a> is an implementation
-  of a Java Virtual Machine (Java VM or JVM) that uses LLVM for static and
-  just-in-time compilation.</p>
-
-<p>The 3.2 release has the following notable changes:</p>
-
-<ul>
-  <li>...</li>
-</ul>
-
-</div>
-
-
-<!--=========================================================================-->
-<h3>
-<a name="Polly">Polly: Polyhedral Optimizer</a>
-</h3>
-
-<div>
-
-<p><a href="http://polly.llvm.org/">Polly</a> is an <em>experimental</em>
-  optimizer for data locality and parallelism. It provides high-level
-  loop optimizations and automatic parallelisation.</p>
-
-<p>Within the LLVM 3.2 time-frame there were the following highlights:</p>
-
-<ul>
-  <li>isl, the integer set library used by Polly, was relicensed to the MIT
-license</li>
-  <li>isl based code generation<br />
-        <ul>
-<li>MIT licensed replacement for CLooG (LGPLv2) </li>
-<li>Fine grained option handling (separation of
-core and border computations, control overhead vs. code size) </li>
-</li>
-</ul>
-<li>Support for FORTRAN and dragonegg</li>
-<li>OpenMP code generation fixes</li>
-</ul>
-
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="externalproj">External Open Source Projects Using LLVM 3.2</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>An exciting aspect of LLVM is that it is used as an enabling technology for
-   a lot of other language and tools projects. This section lists some of the
-   projects that have already been updated to work with LLVM 3.2.</p>
-
-<h3>Crack</h3>
-
-<div>
-
-<p><a href="http://code.google.com/p/crack-language/">Crack</a> aims to provide
-   the ease of development of a scripting language with the performance of a
-   compiled language. The language derives concepts from C++, Java and Python,
-   incorporating object-oriented programming, operator overloading and strong
-   typing.</p>
-
-</div>
-
-<h3>FAUST</h3>
-
-<div>
-
-<p><a href="http://faust.grame.fr/">FAUST</a> is a compiled language for
-   real-time audio signal processing. The name FAUST stands for Functional
-   AUdio STream. Its programming model combines two approaches: functional
-   programming and block diagram composition. In addition with the C, C++, Java,
-   JavaScript output formats, the Faust compiler can generate LLVM bitcode, and
-   works with LLVM 2.7-3.1.</p>
-
-</div>
-
-<h3>Glasgow Haskell Compiler (GHC)</h3>
-
-<div>
-
-<p><a href="http://www.haskell.org/ghc/">GHC</a> is an open source compiler and
-   programming suite for Haskell, a lazy functional programming language. It
-   includes an optimizing static compiler generating good code for a variety of
-   platforms, together with an interactive system for convenient, quick
-   development.</p>
-
-<p>GHC 7.0 and onwards include an LLVM code generator, supporting LLVM 2.8 and
-   later.</p>
-
-</div>
-
-<h3>Julia</h3>
-
-<div>
-
-<p><a href="https://github.com/JuliaLang/julia">Julia</a> is a high-level,
-   high-performance dynamic language for technical computing. It provides a
-   sophisticated compiler, distributed parallel execution, numerical accuracy,
-   and an extensive mathematical function library. The compiler uses type
-   inference to generate fast code without any type declarations, and uses
-   LLVM's optimization passes and JIT compiler. The
-   <a href="http://julialang.org/"> Julia Language</a> is designed
-   around multiple dispatch, giving programs a large degree of flexibility. It
-   is ready for use on many kinds of problems.</p>
-
-</div>
-
-<h3>LLVM D Compiler</h3>
-
-<div>
-
-<p><a href="https://github.com/ldc-developers/ldc">LLVM D Compiler</a> (LDC) is
-   a compiler for the D programming Language. It is based on the DMD frontend
-   and uses LLVM as backend.</p>
-
-</div>
-
-<h3>Open Shading Language</h3>
-
-<div>
-
-<p><a href="https://github.com/imageworks/OpenShadingLanguage/">Open Shading
-   Language (OSL)</a> is a small but rich language for programmable shading in
-   advanced global illumination renderers and other applications, ideal for
-   describing materials, lights, displacement, and pattern generation. It uses
-   LLVM to JIT complex shader networks to x86 code at runtime.</p>
-
-<p>OSL was developed by Sony Pictures Imageworks for use in its in-house
-   renderer used for feature film animation and visual effects, and is
-   distributed as open source software with the "New BSD" license.</p>
-
-</div>
-
-<h3>Portable OpenCL (pocl)</h3>
-
-<div>
-
-<p>In addition to producing an easily portable open source OpenCL
-   implementation, another major goal of <a href="http://pocl.sourceforge.net/">
-   pocl</a> is improving performance portability of OpenCL programs with
-   compiler optimizations, reducing the need for target-dependent manual
-   optimizations. An important part of pocl is a set of LLVM passes used to
-   statically parallelize multiple work-items with the kernel compiler, even in
-   the presence of work-group barriers. This enables static parallelization of
-   the fine-grained static concurrency in the work groups in multiple ways
-   (SIMD, VLIW, superscalar,...).</p>
-
-</div>
-
-<h3>Pure</h3>
-
-<div>
-
-<p><a href="http://pure-lang.googlecode.com/">Pure</a> is an
-   algebraic/functional programming language based on term rewriting. Programs
-   are collections of equations which are used to evaluate expressions in a
-   symbolic fashion. The interpreter uses LLVM as a backend to JIT-compile Pure
-   programs to fast native code. Pure offers dynamic typing, eager and lazy
-   evaluation, lexical closures, a hygienic macro system (also based on term
-   rewriting), built-in list and matrix support (including list and matrix
-   comprehensions) and an easy-to-use interface to C and other programming
-   languages (including the ability to load LLVM bitcode modules, and inline C,
-   C++, Fortran and Faust code in Pure programs if the corresponding
-   LLVM-enabled compilers are installed).</p>
-
-<p>Pure version 0.54 has been tested and is known to work with LLVM 3.1 (and
-   continues to work with older LLVM releases >= 2.5).</p>
-
-</div>
-
-<h3>TTA-based Co-design Environment (TCE)</h3>
-
-<div>
-
-<p><a href="http://tce.cs.tut.fi/">TCE</a> is a toolset for designing
-   application-specific processors (ASP) based on the Transport triggered
-   architecture (TTA). The toolset provides a complete co-design flow from C/C++
-   programs down to synthesizable VHDL/Verilog and parallel program binaries.
-   Processor customization points include the register files, function units,
-   supported operations, and the interconnection network.</p>
-
-<p>TCE uses Clang and LLVM for C/C++ language support, target independent
-   optimizations and also for parts of code generation. It generates new
-   LLVM-based code generators "on the fly" for the designed TTA processors and
-   loads them in to the compiler backend as runtime libraries to avoid
-   per-target recompilation of larger parts of the compiler chain.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="whatsnew">What's New in LLVM 3.2?</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This release includes a huge number of bug fixes, performance tweaks and
-   minor improvements. Some of the major improvements and new features are
-   listed in this section.</p>
-
-<!--=========================================================================-->
-<h3>
-<a name="majorfeatures">Major New Features</a>
-</h3>
-
-<div>
-
-  <!-- Features that need text if they're finished for 3.2:
-   ARM EHABI
-   combiner-aa?
-   strong phi elim
-   loop dependence analysis
-   CorrelatedValuePropagation
-   lib/Transforms/IPO/MergeFunctions.cpp => consider for 3.2.
-   Integrated assembler on by default for arm/thumb?
-
-   -->
-
-  <!-- Near dead:
-   Analysis/RegionInfo.h + Dom Frontiers
-   SparseBitVector: used in LiveVar.
-   llvm/lib/Archive - replace with lib object?
-   -->
-
-<p>LLVM 3.2 includes several major changes and big features:</p>
-
-<ul>
-  <li>...</li>
-</ul>
-
-</div>
-
-
-<!--=========================================================================-->
-<h3>
-<a name="coreimprovements">LLVM IR and Core Improvements</a>
-</h3>
-
-<div>
-
-<p>LLVM IR has several new features for better support of new targets and that
-   expose new optimization opportunities:</p>
-
-<ul>
-  <li>Thread local variables may have a specified TLS model. See the
-  <a href="LangRef.html#globalvars">Language Reference Manual</a>.</li>
-  <li>...</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="optimizer">Optimizer Improvements</a>
-</h3>
-
-<div>
-
-<p>In addition to many minor performance tweaks and bug fixes, this release
-   includes a few major enhancements and additions to the optimizers:</p>
-
-<p> Loop Vectorizer - We've added a loop vectorizer and we are now able to
-    vectorize small loops. The loop vectorizer is disabled by default and
-    can be enabled using the <b>-mllvm -vectorize-loops</b> flag.
-    The SIMD vector width can be specified using the flag
-    <b>-mllvm -force-vector-width=4</b>.
-    The default value is <b>0</b> which means auto-select.
-    <br/>
-    We can now vectorize this function:
-
-    <pre class="doc_code">
-    unsigned sum_arrays(int *A, int *B, int start, int end) {
-      unsigned sum = 0;
-      for (int i = start; i &lt; end; ++i)
-        sum += A[i] + B[i] + i;
-
-      return sum;
-    }
-    </pre>
-
-    We vectorize under the following loops:
-    <ul>
-    <li>The inner most loops must have a single basic block.</li>
-    <li>The number of iterations are known before the loop starts to execute.</li>
-    <li>The loop counter needs to be incremented by one.</li>
-    <li>The loop trip count <b>can</b> be a variable.</li>
-    <li>Loops do <b>not</b> need to start at zero.</li>
-    <li>The induction variable can be used inside the loop.</li>
-    <li>Loop reductions are supported.</li>
-    <li>Arrays with affine access pattern do <b>not</b> need to be marked as 'noalias' and are checked at runtime.</li>
-    <li>...</li>
-    </ul>
-
-</p>
-
-<p>SROA - We've re-written SROA to be significantly more powerful.
-<!-- FIXME: Add more text here... --></p>
-
-<ul>
-  <li>Branch weight metadata is preseved through more of the optimizer.</li>
-  <li>...</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="mc">MC Level Improvements</a>
-</h3>
-
-<div>
-
-<p>The LLVM Machine Code (aka MC) subsystem was created to solve a number of
-   problems in the realm of assembly, disassembly, object file format handling,
-   and a number of other related areas that CPU instruction-set level tools work
-   in. For more information, please see the
-   <a href="http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html">Intro
-   to the LLVM MC Project Blog Post</a>.</p>
-
-<ul>
-  <li>...</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="codegen">Target Independent Code Generator Improvements</a>
-</h3>
-
-<div>
-
-<p>Stack Coloring - We have implemented a new optimization pass
-  to merge stack objects which are used in disjoin areas of the code.
-  This optimization reduces the required stack space significantly, in cases
-  where it is clear to the optimizer that the stack slot is not shared.
-  We use the lifetime markers to tell the codegen that a certain alloca
-  is used within a region.</p>
-
-<p> We now merge consecutive loads and stores. </p>
-
-<p>We have put a significant amount of work into the code generator
-   infrastructure, which allows us to implement more aggressive algorithms and
-   make it run faster:</p>
-
-<ul>
-  <li>...</li>
-</ul>
-
-<p> We added new TableGen infrastructure to support bundling for
-    Very Long Instruction Word (VLIW) architectures. TableGen can now
-    automatically generate a deterministic finite automaton from a VLIW
-    target's schedule description which can be queried to determine
-    legal groupings of instructions in a bundle.</p>
-
-<p> We have added a new target independent VLIW packetizer based on the
-    DFA infrastructure to group machine instructions into bundles.</p>
-
-</div>
-
-<h4>
-<a name="blockplacement">Basic Block Placement</a>
-</h4>
-
-<div>
-
-<p>A probability based block placement and code layout algorithm was added to
-   LLVM's code generator. This layout pass supports probabilities derived from
-   static heuristics as well as source code annotations such as
-   <code>__builtin_expect</code>.</p>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="x86">X86-32 and X86-64 Target Improvements</a>
-</h3>
-
-<div>
-
-<p>New features and major changes in the X86 target include:</p>
-
-<ul>
-  <li>...</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="ARM">ARM Target Improvements</a>
-</h3>
-
-<div>
-
-<p>New features of the ARM target include:</p>
-
-<ul>
-  <li>...</li>
-</ul>
-
-<!--_________________________________________________________________________-->
-
-<h4>
-<a name="armintegratedassembler">ARM Integrated Assembler</a>
-</h4>
-
-<div>
-
-<p>The ARM target now includes a full featured macro assembler, including
-   direct-to-object module support for clang. The assembler is currently enabled
-   by default for Darwin only pending testing and any additional necessary
-   platform specific support for Linux.</p>
-
-<p>Full support is included for Thumb1, Thumb2 and ARM modes, along with
-   subtarget and CPU specific extensions for VFP2, VFP3 and NEON.</p>
-
-<p>The assembler is Unified Syntax only (see ARM Architecural Reference Manual
-   for details). While there is some, and growing, support for pre-unfied
-   (divided) syntax, there are still significant gaps in that support.</p>
-
-</div>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="MIPS">MIPS Target Improvements</a>
-</h3>
-
-<div>
-
-<p>New features and major changes in the MIPS target include:</p>
-
-<ul>
-  <li>...</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="PowerPC">PowerPC Target Improvements</a>
-</h3>
-
-<div>
-
-<ul>
-<p>Many fixes and changes across LLVM (and Clang) for better compliance with
-   the 64-bit PowerPC ELF Application Binary Interface, interoperability with
-   GCC, and overall 64-bit PowerPC support.   Some highlights include:</p>
-<ul>
-  <li>  MCJIT support added.</li>
-  <li>  PPC64 relocation support and (small code model) TOC handling
-        added.</li>
-  <li>  Parameter passing and return value fixes (alignment issues,
-        padding, varargs support, proper register usage, odd-sized
-        structure support, float support, extension of return values
-        for i32 return values).</li>
-  <li>  Fixes in spill and reload code for vector registers.</li>
-  <li>  C++ exception handling enabled.</li>
-  <li>  Changes to remediate double-rounding compatibility issues with
-        respect to GCC behavior.</li>
-  <li>  Refactoring to disentangle ppc64-elf-linux ABI from Darwin
-        ppc64 ABI support.</li>
-  <li>  Assorted new test cases and test case fixes (endian and word
-        size issues).</li>
-  <li>  Fixes for big-endian codegen bugs, instruction encodings, and
-        instruction constraints.</li>
-  <li>  Implemented -integrated-as support.</li>
-  <li>  Additional support for Altivec compare operations.</li>
-  <li>  IBM long double support.</li>
-</ul>
-<p>There have also been code generation improvements for both 32- and 64-bit
-   code. Instruction scheduling support for the Freescale e500mc and e5500
-   cores has been added.</p>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="OtherTS">Other Target Specific Improvements</a>
-</h3>
-
-<div>
-
-<ul>
-  <li>...</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="changes">Major Changes and Removed Features</a>
-</h3>
-
-<div>
-
-<p>If you're already an LLVM user or developer with out-of-tree changes based on
-   LLVM 3.2, this section lists some "gotchas" that you may run into upgrading
-   from the previous release.</p>
-
-<ul>
-  <li>The CellSPU port has been removed. It can still be found in older
-      versions.</li>
-  <li>...</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="api_changes">Internal API Changes</a>
-</h3>
-
-<div>
-
-<p>In addition, many APIs have changed in this release.  Some of the major
-   LLVM API changes are:</p>
-
-<p> We've added a new interface for allowing IR-level passes to access
-  target-specific information. A new IR-level pass, called
-  "TargetTransformInfo" provides a number of low-level interfaces.
-  LSR and LowerInvoke already use the new interface. </p>
-
-<p> The TargetData structure has been renamed to DataLayout and moved to VMCore
-to remove a dependency on Target. </p>
-
-<ul>
-  <li>...</li>
-</ul>
-
-</div>
-
-<!--=========================================================================-->
-<h3>
-<a name="tools_changes">Tools Changes</a>
-</h3>
-
-<div>
-
-<p>In addition, some tools have changed in this release. Some of the changes
-   are:</p>
-
-<ul>
-  <li>...</li>
-</ul>
-
-</div>
-
-
-<!--=========================================================================-->
-<h3>
-<a name="python">Python Bindings</a>
-</h3>
-
-<div>
-
-<p>Officially supported Python bindings have been added! Feature support is far
-   from complete. The current bindings support interfaces to:</p>
-
-<ul>
-  <li>...</li>
-</ul>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="knownproblems">Known Problems</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>LLVM is generally a production quality compiler, and is used by a broad range
-   of applications and shipping in many products.  That said, not every
-   subsystem is as mature as the aggregate, particularly the more obscure
-   targets.  If you run into a problem, please check
-   the <a href="http://llvm.org/bugs/">LLVM bug database</a> and submit a bug if
-   there isn't already one or ask on
-   the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev
-   list</a>.</p>
-
-  <p>Known problem areas include:</p>
-
-<ul>
-  <li>The CellSPU, MSP430, PTX and XCore backends are experimental.</li>
-
-  <li>The integrated assembler, disassembler, and JIT is not supported by
-      several targets. If an integrated assembler is not supported, then a
-      system assembler is required.  For more details, see the <a
-      href="CodeGenerator.html#targetfeatures">Target Features Matrix</a>.
-  </li>
-</ul>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="additionalinfo">Additional Information</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>A wide variety of additional information is available on
-   the <a href="http://llvm.org/">LLVM web page</a>, in particular in
-   the <a href="http://llvm.org/docs/">documentation</a> section.  The web page
-   also contains versions of the API documentation which is up-to-date with the
-   Subversion version of the source code.  You can access versions of these
-   documents specific to this release by going into the "<tt>llvm/doc/</tt>"
-   directory in the LLVM tree.</p>
-
-<p>If you have any questions or comments about LLVM, please feel free to contact
-   us via the <a href="http://llvm.org/docs/#maillist"> mailing lists</a>.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-
-</body>
-</html>
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
new file mode 100644
index 0000000000..a5922ad983
--- /dev/null
+++ b/docs/ReleaseNotes.rst
@@ -0,0 +1,564 @@
+.. raw:: html
+
+    <style> .red {color:red} </style>
+
+.. role:: red
+
+======================
+LLVM 3.2 Release Notes
+======================
+
+.. contents::
+    :local:
+
+Written by the `LLVM Team <http://llvm.org/>`_
+
+:red:`These are in-progress notes for the upcoming LLVM 3.2 release.  You may
+prefer the` `LLVM 3.1 Release Notes <http://llvm.org/releases/3.1/docs
+/ReleaseNotes.html>`_.
+
+Introduction
+============
+
+This document contains the release notes for the LLVM Compiler Infrastructure,
+release 3.2.  Here we describe the status of LLVM, including major improvements
+from the previous release, improvements in various subprojects of LLVM, and
+some of the current users of the code.  All LLVM releases may be downloaded
+from the `LLVM releases web site <http://llvm.org/releases/>`_.
+
+For more information about LLVM, including information about the latest
+release, please check out the `main LLVM web site <http://llvm.org/>`_.  If you
+have questions or comments, the `LLVM Developer's Mailing List
+<http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ is a good place to send
+them.
+
+Note that if you are reading this file from a Subversion checkout or the main
+LLVM web page, this document applies to the *next* release, not the current
+one.  To see the release notes for a specific release, please see the `releases
+page <http://llvm.org/releases/>`_.
+
+Sub-project Status Update
+=========================
+
+The LLVM 3.2 distribution currently consists of code from the core LLVM
+repository, which roughly includes the LLVM optimizers, code generators and
+supporting tools, and the Clang repository.  In addition to this code, the LLVM
+Project includes other sub-projects that are in development.  Here we include
+updates on these subprojects.
+
+Clang: C/C++/Objective-C Frontend Toolkit
+-----------------------------------------
+
+`Clang <http://clang.llvm.org/>`_ is an LLVM front end for the C, C++, and
+Objective-C languages.  Clang aims to provide a better user experience through
+expressive diagnostics, a high level of conformance to language standards, fast
+compilation, and low memory use.  Like LLVM, Clang provides a modular,
+library-based architecture that makes it suitable for creating or integrating
+with other development tools.  Clang is considered a production-quality
+compiler for C, Objective-C, C++ and Objective-C++ on x86 (32- and 64-bit), and
+for Darwin/ARM targets.
+
+In the LLVM 3.2 time-frame, the Clang team has made many improvements.
+Highlights include:
+
+#. ...
+
+For more details about the changes to Clang since the 3.1 release, see the
+`Clang release notes. <http://clang.llvm.org/docs/ReleaseNotes.html>`_
+
+If Clang rejects your code but another compiler accepts it, please take a look
+at the `language compatibility <http://clang.llvm.org/compatibility.html>`_
+guide to make sure this is not intentional or a known issue.
+
+DragonEgg: GCC front-ends, LLVM back-end
+----------------------------------------
+
+`DragonEgg <http://dragonegg.llvm.org/>`_ is a `gcc plugin
+<http://gcc.gnu.org/wiki/plugins>`_ that replaces GCC's optimizers and code
+generators with LLVM's.  It works with gcc-4.5 and gcc-4.6 (and partially with
+gcc-4.7), can target the x86-32/x86-64 and ARM processor families, and has been
+successfully used on the Darwin, FreeBSD, KFreeBSD, Linux and OpenBSD
+platforms.  It fully supports Ada, C, C++ and Fortran.  It has partial support
+for Go, Java, Obj-C and Obj-C++.
+
+The 3.2 release has the following notable changes:
+
+#. ...
+
+compiler-rt: Compiler Runtime Library
+-------------------------------------
+
+The new LLVM `compiler-rt project <http://compiler-rt.llvm.org/>`_ is a simple
+library that provides an implementation of the low-level target-specific hooks
+required by code generation and other runtime components.  For example, when
+compiling for a 32-bit target, converting a double to a 64-bit unsigned integer
+is compiled into a runtime call to the ``__fixunsdfdi`` function.  The
+``compiler-rt`` library provides highly optimized implementations of this and
+other low-level routines (some are 3x faster than the equivalent libgcc
+routines).
+
+The 3.2 release has the following notable changes:
+
+#. ...
+
+LLDB: Low Level Debugger
+------------------------
+
+`LLDB <http://lldb.llvm.org>`_ is a ground-up implementation of a command line
+debugger, as well as a debugger API that can be used from other applications.
+LLDB makes use of the Clang parser to provide high-fidelity expression parsing
+(particularly for C++) and uses the LLVM JIT for target support.
+
+The 3.2 release has the following notable changes:
+
+#. ...
+
+libc++: C++ Standard Library
+----------------------------
+
+Like compiler_rt, libc++ is now :ref:`dual licensed
+<copyright-license-patents>` under the MIT and UIUC license, allowing it to be
+used more permissively.
+
+Within the LLVM 3.2 time-frame there were the following highlights:
+
+#. ...
+
+VMKit
+-----
+
+The `VMKit project <http://vmkit.llvm.org/>`_ is an implementation of a Java
+Virtual Machine (Java VM or JVM) that uses LLVM for static and just-in-time
+compilation.
+
+The 3.2 release has the following notable changes:
+
+#. ...
+
+Polly: Polyhedral Optimizer
+---------------------------
+
+`Polly <http://polly.llvm.org/>`_ is an *experimental* optimizer for data
+locality and parallelism.  It provides high-level loop optimizations and
+automatic parallelisation.
+
+Within the LLVM 3.2 time-frame there were the following highlights:
+
+#. isl, the integer set library used by Polly, was relicensed to the MIT license
+#. isl based code generation
+#. MIT licensed replacement for CLooG (LGPLv2)
+#. Fine grained option handling (separation of core and border computations,
+   control overhead vs. code size)
+#. Support for FORTRAN and dragonegg
+#. OpenMP code generation fixes
+
+External Open Source Projects Using LLVM 3.2
+============================================
+
+An exciting aspect of LLVM is that it is used as an enabling technology for a
+lot of other language and tools projects.  This section lists some of the
+projects that have already been updated to work with LLVM 3.2.
+
+Crack
+-----
+
+`Crack <http://code.google.com/p/crack-language/>`_ aims to provide the ease of
+development of a scripting language with the performance of a compiled
+language.  The language derives concepts from C++, Java and Python,
+incorporating object-oriented programming, operator overloading and strong
+typing.
+
+FAUST
+-----
+
+`FAUST <http://faust.grame.fr/>`_ is a compiled language for real-time audio
+signal processing.  The name FAUST stands for Functional AUdio STream.  Its
+programming model combines two approaches: functional programming and block
+diagram composition.  In addition with the C, C++, Java, JavaScript output
+formats, the Faust compiler can generate LLVM bitcode, and works with LLVM
+2.7-3.1.
+
+Glasgow Haskell Compiler (GHC)
+------------------------------
+
+`GHC <http://www.haskell.org/ghc/>`_ is an open source compiler and programming
+suite for Haskell, a lazy functional programming language.  It includes an
+optimizing static compiler generating good code for a variety of platforms,
+together with an interactive system for convenient, quick development.
+
+GHC 7.0 and onwards include an LLVM code generator, supporting LLVM 2.8 and
+later.
+
+Julia
+-----
+
+`Julia <https://github.com/JuliaLang/julia>`_ is a high-level, high-performance
+dynamic language for technical computing.  It provides a sophisticated
+compiler, distributed parallel execution, numerical accuracy, and an extensive
+mathematical function library.  The compiler uses type inference to generate
+fast code without any type declarations, and uses LLVM's optimization passes
+and JIT compiler.  The `Julia Language <http://julialang.org/>`_ is designed
+around multiple dispatch, giving programs a large degree of flexibility.  It is
+ready for use on many kinds of problems.
+
+LLVM D Compiler
+---------------
+
+`LLVM D Compiler <https://github.com/ldc-developers/ldc>`_ (LDC) is a compiler
+for the D programming Language.  It is based on the DMD frontend and uses LLVM
+as backend.
+
+Open Shading Language
+---------------------
+
+`Open Shading Language (OSL)
+<https://github.com/imageworks/OpenShadingLanguage/>`_ is a small but rich
+language for programmable shading in advanced global illumination renderers and
+other applications, ideal for describing materials, lights, displacement, and
+pattern generation.  It uses LLVM to JIT complex shader networks to x86 code at
+runtime.
+
+OSL was developed by Sony Pictures Imageworks for use in its in-house renderer
+used for feature film animation and visual effects, and is distributed as open
+source software with the "New BSD" license.
+
+Portable OpenCL (pocl)
+----------------------
+
+In addition to producing an easily portable open source OpenCL implementation,
+another major goal of `pocl <http://pocl.sourceforge.net/>`_ is improving
+performance portability of OpenCL programs with compiler optimizations,
+reducing the need for target-dependent manual optimizations.  An important part
+of pocl is a set of LLVM passes used to statically parallelize multiple
+work-items with the kernel compiler, even in the presence of work-group
+barriers.  This enables static parallelization of the fine-grained static
+concurrency in the work groups in multiple ways (SIMD, VLIW, superscalar, ...).
+
+Pure
+----
+
+`Pure <http://pure-lang.googlecode.com/>`_ is an algebraic/functional
+programming language based on term rewriting.  Programs are collections of
+equations which are used to evaluate expressions in a symbolic fashion.  The
+interpreter uses LLVM as a backend to JIT-compile Pure programs to fast native
+code.  Pure offers dynamic typing, eager and lazy evaluation, lexical closures,
+a hygienic macro system (also based on term rewriting), built-in list and
+matrix support (including list and matrix comprehensions) and an easy-to-use
+interface to C and other programming languages (including the ability to load
+LLVM bitcode modules, and inline C, C++, Fortran and Faust code in Pure
+programs if the corresponding LLVM-enabled compilers are installed).
+
+Pure version 0.54 has been tested and is known to work with LLVM 3.1 (and
+continues to work with older LLVM releases >= 2.5).
+
+TTA-based Co-design Environment (TCE)
+-------------------------------------
+
+`TCE <http://tce.cs.tut.fi/>`_ is a toolset for designing application-specific
+processors (ASP) based on the Transport triggered architecture (TTA).  The
+toolset provides a complete co-design flow from C/C++ programs down to
+synthesizable VHDL/Verilog and parallel program binaries.  Processor
+customization points include the register files, function units, supported
+operations, and the interconnection network.
+
+TCE uses Clang and LLVM for C/C++ language support, target independent
+optimizations and also for parts of code generation.  It generates new
+LLVM-based code generators "on the fly" for the designed TTA processors and
+loads them in to the compiler backend as runtime libraries to avoid per-target
+recompilation of larger parts of the compiler chain.
+
+Installation Instructions
+=========================
+
+See :doc:`GettingStarted`.
+
+What's New in LLVM 3.2?
+=======================
+
+This release includes a huge number of bug fixes, performance tweaks and minor
+improvements.  Some of the major improvements and new features are listed in
+this section.
+
+Major New Features
+------------------
+
+..
+
+  Features that need text if they're finished for 3.2:
+   ARM EHABI
+   combiner-aa?
+   strong phi elim
+   loop dependence analysis
+   CorrelatedValuePropagation
+   lib/Transforms/IPO/MergeFunctions.cpp => consider for 3.2.
+   Integrated assembler on by default for arm/thumb?
+
+  Near dead:
+   Analysis/RegionInfo.h + Dom Frontiers
+   SparseBitVector: used in LiveVar.
+   llvm/lib/Archive - replace with lib object?
+
+
+LLVM 3.2 includes several major changes and big features:
+
+#. New NVPTX back-end (replacing existing PTX back-end) based on NVIDIA sources
+#. ...
+
+LLVM IR and Core Improvements
+-----------------------------
+
+LLVM IR has several new features for better support of new targets and that
+expose new optimization opportunities:
+
+#. Thread local variables may have a specified TLS model.  See the :ref:`Language
+   Reference Manual <globalvars>`.
+#. ...
+
+Optimizer Improvements
+----------------------
+
+In addition to many minor performance tweaks and bug fixes, this release
+includes a few major enhancements and additions to the optimizers:
+
+Loop Vectorizer - We've added a loop vectorizer and we are now able to
+vectorize small loops.  The loop vectorizer is disabled by default and can be
+enabled using the ``-mllvm -vectorize-loops`` flag.  The SIMD vector width can
+be specified using the flag ``-mllvm -force-vector-width=4``.  The default
+value is ``0`` which means auto-select.
+
+We can now vectorize this function:
+
+.. code-block:: c++
+
+  unsigned sum_arrays(int *A, int *B, int start, int end) {
+    unsigned sum = 0;
+    for (int i = start; i < end; ++i)
+      sum += A[i] + B[i] + i;
+    return sum;
+  }
+
+We vectorize under the following loops:
+
+#. The inner most loops must have a single basic block.
+#. The number of iterations are known before the loop starts to execute.
+#. The loop counter needs to be incremented by one.
+#. The loop trip count **can** be a variable.
+#. Loops do **not** need to start at zero.
+#. The induction variable can be used inside the loop.
+#. Loop reductions are supported.
+#. Arrays with affine access pattern do **not** need to be marked as
+   '``noalias``' and are checked at runtime.
+#. ...
+
+SROA - We've re-written SROA to be significantly more powerful.
+
+#. Branch weight metadata is preseved through more of the optimizer.
+#. ...
+
+MC Level Improvements
+---------------------
+
+The LLVM Machine Code (aka MC) subsystem was created to solve a number of
+problems in the realm of assembly, disassembly, object file format handling,
+and a number of other related areas that CPU instruction-set level tools work
+in.  For more information, please see the `Intro to the LLVM MC Project Blog
+Post <http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html>`_.
+
+#. ...
+
+.. _codegen:
+
+Target Independent Code Generator Improvements
+----------------------------------------------
+
+Stack Coloring - We have implemented a new optimization pass to merge stack
+objects which are used in disjoin areas of the code.  This optimization reduces
+the required stack space significantly, in cases where it is clear to the
+optimizer that the stack slot is not shared.  We use the lifetime markers to
+tell the codegen that a certain alloca is used within a region.
+
+We now merge consecutive loads and stores.
+
+We have put a significant amount of work into the code generator
+infrastructure, which allows us to implement more aggressive algorithms and
+make it run faster:
+
+#. ...
+
+We added new TableGen infrastructure to support bundling for Very Long
+Instruction Word (VLIW) architectures.  TableGen can now automatically generate
+a deterministic finite automaton from a VLIW target's schedule description
+which can be queried to determine legal groupings of instructions in a bundle.
+
+We have added a new target independent VLIW packetizer based on the DFA
+infrastructure to group machine instructions into bundles.
+
+Basic Block Placement
+^^^^^^^^^^^^^^^^^^^^^
+
+A probability based block placement and code layout algorithm was added to
+LLVM's code generator.  This layout pass supports probabilities derived from
+static heuristics as well as source code annotations such as
+``__builtin_expect``.
+
+X86-32 and X86-64 Target Improvements
+-------------------------------------
+
+New features and major changes in the X86 target include:
+
+#. ...
+
+.. _ARM:
+
+ARM Target Improvements
+-----------------------
+
+New features of the ARM target include:
+
+#. ...
+
+.. _armintegratedassembler:
+
+ARM Integrated Assembler
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ARM target now includes a full featured macro assembler, including
+direct-to-object module support for clang.  The assembler is currently enabled
+by default for Darwin only pending testing and any additional necessary
+platform specific support for Linux.
+
+Full support is included for Thumb1, Thumb2 and ARM modes, along with subtarget
+and CPU specific extensions for VFP2, VFP3 and NEON.
+
+The assembler is Unified Syntax only (see ARM Architecural Reference Manual for
+details).  While there is some, and growing, support for pre-unfied (divided)
+syntax, there are still significant gaps in that support.
+
+MIPS Target Improvements
+------------------------
+
+New features and major changes in the MIPS target include:
+
+#. ...
+
+PowerPC Target Improvements
+---------------------------
+
+Many fixes and changes across LLVM (and Clang) for better compliance with the
+64-bit PowerPC ELF Application Binary Interface, interoperability with GCC, and
+overall 64-bit PowerPC support.  Some highlights include:
+
+#. MCJIT support added.
+#. PPC64 relocation support and (small code model) TOC handling added.
+#. Parameter passing and return value fixes (alignment issues, padding, varargs
+   support, proper register usage, odd-sized structure support, float support,
+   extension of return values for i32 return values).
+#. Fixes in spill and reload code for vector registers.
+#. C++ exception handling enabled.
+#. Changes to remediate double-rounding compatibility issues with respect to
+   GCC behavior.
+#. Refactoring to disentangle ``ppc64-elf-linux`` ABI from Darwin ppc64 ABI
+   support.
+#. Assorted new test cases and test case fixes (endian and word size issues).
+#. Fixes for big-endian codegen bugs, instruction encodings, and instruction
+   constraints.
+#. Implemented ``-integrated-as`` support.
+#. Additional support for Altivec compare operations.
+#. IBM long double support.
+
+There have also been code generation improvements for both 32- and 64-bit code.
+Instruction scheduling support for the Freescale e500mc and e5500 cores has
+been added.
+
+PTX/NVPTX Target Improvements
+-----------------------------
+
+The PTX back-end has been replaced by the NVPTX back-end, which is based on the
+LLVM back-end used by NVIDIA in their CUDA (nvcc) and OpenCL compiler.  Some
+highlights include:
+
+#. Compatibility with PTX 3.1 and SM 3.5.
+#. Support for NVVM intrinsics as defined in the NVIDIA Compiler SDK.
+#. Full compatibility with old PTX back-end, with much greater coverage of LLVM
+   SIR.
+
+Please submit any back-end bugs to the LLVM Bugzilla site.
+
+Other Target Specific Improvements
+----------------------------------
+
+#. ...
+
+Major Changes and Removed Features
+----------------------------------
+
+If you're already an LLVM user or developer with out-of-tree changes based on
+LLVM 3.2, this section lists some "gotchas" that you may run into upgrading
+from the previous release.
+
+#. The CellSPU port has been removed.  It can still be found in older versions.
+#. ...
+
+Internal API Changes
+--------------------
+
+In addition, many APIs have changed in this release.  Some of the major LLVM
+API changes are:
+
+We've added a new interface for allowing IR-level passes to access
+target-specific information.  A new IR-level pass, called
+``TargetTransformInfo`` provides a number of low-level interfaces.  LSR and
+LowerInvoke already use the new interface.
+
+The ``TargetData`` structure has been renamed to ``DataLayout`` and moved to
+``VMCore`` to remove a dependency on ``Target``.
+
+#. ...
+
+Tools Changes
+-------------
+
+In addition, some tools have changed in this release.  Some of the changes are:
+
+#. ...
+
+Python Bindings
+---------------
+
+Officially supported Python bindings have been added!  Feature support is far
+from complete.  The current bindings support interfaces to:
+
+#. ...
+
+Known Problems
+==============
+
+LLVM is generally a production quality compiler, and is used by a broad range
+of applications and shipping in many products.  That said, not every subsystem
+is as mature as the aggregate, particularly the more obscure1 targets.  If you
+run into a problem, please check the `LLVM bug database
+<http://llvm.org/bugs/>`_ and submit a bug if there isn't already one or ask on
+the `LLVMdev list <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_.
+
+Known problem areas include:
+
+#. The CellSPU, MSP430, and XCore backends are experimental.
+
+#. The integrated assembler, disassembler, and JIT is not supported by several
+   targets.  If an integrated assembler is not supported, then a system
+   assembler is required.  For more details, see the
+   :ref:`target-feature-matrix`.
+
+Additional Information
+======================
+
+A wide variety of additional information is available on the `LLVM web page
+<http://llvm.org/>`_, in particular in the `documentation
+<http://llvm.org/docs/>`_ section.  The web page also contains versions of the
+API documentation which is up-to-date with the Subversion version of the source
+code.  You can access versions of these documents specific to this release by
+going into the ``llvm/docs/`` directory in the LLVM tree.
+
+If you have any questions or comments about LLVM, please feel free to contact
+us via the `mailing lists <http://llvm.org/docs/#maillist>`_.
+
diff --git a/docs/SourceLevelDebugging.rst b/docs/SourceLevelDebugging.rst
index 2bbf2e2c35..d7c50d234a 100644
--- a/docs/SourceLevelDebugging.rst
+++ b/docs/SourceLevelDebugging.rst
@@ -231,9 +231,8 @@ produced it.
 
 Compile unit descriptors provide the root context for objects declared in a
 specific compilation unit.  File descriptors are defined using this context.
-These descriptors are collected by a named metadata ``!llvm.dbg.cu``.  Compile
-unit descriptor keeps track of subprograms, global variables and type
-information.
+These descriptors are collected by a named metadata ``!llvm.dbg.cu``.  They
+keep track of subprograms, global variables and type information.
 
 .. _format_files:
 
@@ -278,7 +277,7 @@ Global variable descriptors
     {}*       ;; Reference to the global variable
   }
 
-These descriptors provide debug information about globals variables.  The
+These descriptors provide debug information about globals variables.  They
 provide details such as name, type and where the variable is defined.  All
 global variables are collected inside the named metadata ``!llvm.dbg.cu``.
 
diff --git a/docs/SphinxQuickstartTemplate.rst b/docs/SphinxQuickstartTemplate.rst
index 75d916368e..640df63db1 100644
--- a/docs/SphinxQuickstartTemplate.rst
+++ b/docs/SphinxQuickstartTemplate.rst
@@ -116,6 +116,26 @@ For a shell session, use a ``bash`` code block:
 
 If you need to show LLVM IR use the ``llvm`` code block.
 
+You can show preformatted text without any syntax highlighting like this:
+
+::
+
+                          .
+                           +:.
+                       ..:: ::
+                    .++:+:: ::+:.:.
+                   .:+           :
+            ::.::..::            .+.
+          ..:+    ::              :
+    ......+:.                    ..
+          :++.    ..              :
+            .+:::+::              :
+            ..   . .+            ::
+                     +.:      .::+.
+                      ...+. .: .
+                         .++:..
+                          ...
+
 Hopefully you won't need to be this deep
 """"""""""""""""""""""""""""""""""""""""
 
diff --git a/docs/SystemLibrary.rst b/docs/SystemLibrary.rst
index b54026717c..88404f4d81 100644
--- a/docs/SystemLibrary.rst
+++ b/docs/SystemLibrary.rst
@@ -7,7 +7,6 @@ System Library
 Abstract
 ========
 
-
 This document provides some details on LLVM's System Library, located in the
 source at ``lib/System`` and ``include/llvm/System``. The library's purpose is
 to shield LLVM from the differences between operating systems for the few
diff --git a/docs/TestingGuide.rst b/docs/TestingGuide.rst
index 329003f089..f66cae1d14 100644
--- a/docs/TestingGuide.rst
+++ b/docs/TestingGuide.rst
@@ -46,23 +46,15 @@ Regression tests
 ----------------
 
 The regression tests are small pieces of code that test a specific
-feature of LLVM or trigger a specific bug in LLVM. They are usually
-written in LLVM assembly language, but can be written in other languages
-if the test targets a particular language front end (and the appropriate
-``--with-llvmgcc`` options were used at ``configure`` time of the
-``llvm`` module). These tests are driven by the 'lit' testing tool,
-which is part of LLVM.
-
-These code fragments are not complete programs. The code generated from
-them is never executed to determine correct behavior.
-
-These code fragment tests are located in the ``llvm/test`` directory.
+feature of LLVM or trigger a specific bug in LLVM. The language they are
+written in depends on the part of LLVM being tested. These tests are driven by
+the :doc:`Lit <CommandGuide/lit>` testing tool (which is part of LLVM), and
+are located in the ``llvm/test`` directory.
 
 Typically when a bug is found in LLVM, a regression test containing just
 enough code to reproduce the problem should be written and placed
-somewhere underneath this directory. In most cases, this will be a small
-piece of LLVM assembly language code, often distilled from an actual
-application or benchmark.
+somewhere underneath this directory. For example, it can be a small
+piece of LLVM IR distilled from an actual application or benchmark.
 
 ``test-suite``
 --------------
@@ -100,8 +92,8 @@ Quick start
 
 The tests are located in two separate Subversion modules. The
 regressions tests are in the main "llvm" module under the directory
-``llvm/test`` (so you get these tests for free with the main llvm tree).
-Use "make check-all" to run the regression tests after building LLVM.
+``llvm/test`` (so you get these tests for free with the main LLVM tree).
+Use ``make check-all`` to run the regression tests after building LLVM.
 
 The more comprehensive test suite that includes whole programs in C and C++
 is in the ``test-suite`` module. See :ref:`test-suite Quickstart
@@ -110,38 +102,37 @@ is in the ``test-suite`` module. See :ref:`test-suite Quickstart
 Regression tests
 ----------------
 
-To run all of the LLVM regression tests, use master Makefile in the
-``llvm/test`` directory:
+To run all of the LLVM regression tests, use the master Makefile in the
+``llvm/test`` directory. LLVM Makefiles require GNU Make (read the :doc:`LLVM
+Makefile Guide <MakefileGuide>` for more details):
 
 .. code-block:: bash
 
-    % gmake -C llvm/test
+    % make -C llvm/test
 
-or
+or:
 
 .. code-block:: bash
 
-    % gmake check
+    % make check
 
 If you have `Clang <http://clang.llvm.org/>`_ checked out and built, you
 can run the LLVM and Clang tests simultaneously using:
 
-or
-
 .. code-block:: bash
 
-    % gmake check-all
+    % make check-all
 
 To run the tests with Valgrind (Memcheck by default), just append
 ``VG=1`` to the commands above, e.g.:
 
 .. code-block:: bash
 
-    % gmake check VG=1
+    % make check VG=1
 
-To run individual tests or subsets of tests, you can use the 'llvm-lit'
+To run individual tests or subsets of tests, you can use the ``llvm-lit``
 script which is built as part of LLVM. For example, to run the
-'Integer/BitPacked.ll' test by itself you can run:
+``Integer/BitPacked.ll`` test by itself you can run:
 
 .. code-block:: bash
 
@@ -153,8 +144,8 @@ or to run all of the ARM CodeGen tests:
 
     % llvm-lit ~/llvm/test/CodeGen/ARM
 
-For more information on using the 'lit' tool, see 'llvm-lit --help' or
-the 'lit' man page.
+For more information on using the :program:`lit` tool, see ``llvm-lit --help``
+or the :doc:`lit man page <CommandGuide/lit>`.
 
 Debugging Information tests
 ---------------------------
@@ -172,36 +163,25 @@ These tests are already set up to run as part of clang regression tests.
 Regression test structure
 =========================
 
-The LLVM regression tests are driven by 'lit' and are located in the
+The LLVM regression tests are driven by :program:`lit` and are located in the
 ``llvm/test`` directory.
 
 This directory contains a large array of small tests that exercise
 various features of LLVM and to ensure that regressions do not occur.
 The directory is broken into several sub-directories, each focused on a
-particular area of LLVM. A few of the important ones are:
-
--  ``Analysis``: checks Analysis passes.
--  ``Archive``: checks the Archive library.
--  ``Assembler``: checks Assembly reader/writer functionality.
--  ``Bitcode``: checks Bitcode reader/writer functionality.
--  ``CodeGen``: checks code generation and each target.
--  ``Features``: checks various features of the LLVM language.
--  ``Linker``: tests bitcode linking.
--  ``Transforms``: tests each of the scalar, IPO, and utility transforms
-   to ensure they make the right transformations.
--  ``Verifier``: tests the IR verifier.
+particular area of LLVM.
 
 Writing new regression tests
 ----------------------------
 
 The regression test structure is very simple, but does require some
 information to be set. This information is gathered via ``configure``
-and is written to a file, ``lit.site.cfg`` in ``llvm/test``. The
-``llvm/test`` Makefile does this work for you.
+and is written to a file, ``test/lit.site.cfg`` in the build directory.
+The ``llvm/test`` Makefile does this work for you.
 
 In order for the regression tests to work, each directory of tests must
-have a ``lit.local.cfg`` file. Lit looks for this file to determine how
-to run the tests. This file is just Python code and thus is very
+have a ``lit.local.cfg`` file. :program:`lit` looks for this file to determine
+how to run the tests. This file is just Python code and thus is very
 flexible, but we've standardized it for the LLVM regression tests. If
 you're adding a directory of tests, just copy ``lit.local.cfg`` from
 another directory to get running. The standard ``lit.local.cfg`` simply
@@ -209,28 +189,24 @@ specifies which files to look in for tests. Any directory that contains
 only directories does not need the ``lit.local.cfg`` file. Read the :doc:`Lit
 documentation <CommandGuide/lit>` for more information.
 
-The ``llvm-runtests`` function looks at each file that is passed to it
-and gathers any lines together that match "RUN:". These are the "RUN"
-lines that specify how the test is to be run. So, each test script must
-contain RUN lines if it is to do anything. If there are no RUN lines,
-the ``llvm-runtests`` function will issue an error and the test will
-fail.
+Each test file must contain lines starting with "RUN:" that tell :program:`lit`
+how to run it. If there are no RUN lines, :program:`lit` will issue an error
+while running a test.
 
 RUN lines are specified in the comments of the test program using the
 keyword ``RUN`` followed by a colon, and lastly the command (pipeline)
-to execute. Together, these lines form the "script" that
-``llvm-runtests`` executes to run the test case. The syntax of the RUN
-lines is similar to a shell's syntax for pipelines including I/O
-redirection and variable substitution. However, even though these lines
-may *look* like a shell script, they are not. RUN lines are interpreted
-directly by the Tcl ``exec`` command. They are never executed by a
-shell. Consequently the syntax differs from normal shell script syntax
-in a few ways. You can specify as many RUN lines as needed.
-
-lit performs substitution on each RUN line to replace LLVM tool names
+to execute. Together, these lines form the "script" that :program:`lit`
+executes to run the test case. The syntax of the RUN lines is similar to a
+shell's syntax for pipelines including I/O redirection and variable
+substitution. However, even though these lines may *look* like a shell
+script, they are not. RUN lines are interpreted by :program:`lit`.
+Consequently, the syntax differs from shell in a few ways. You can specify
+as many RUN lines as needed.
+
+:program:`lit` performs substitution on each RUN line to replace LLVM tool names
 with the full paths to the executable built for each tool (in
-$(LLVM\_OBJ\_ROOT)/$(BuildMode)/bin). This ensures that lit does not
-invoke any stray LLVM tools in the user's path during testing.
+``$(LLVM_OBJ_ROOT)/$(BuildMode)/bin)``. This ensures that :program:`lit` does
+not invoke any stray LLVM tools in the user's path during testing.
 
 Each RUN line is executed on its own, distinct from other lines unless
 its last character is ``\``. This continuation character causes the RUN
@@ -238,8 +214,8 @@ line to be concatenated with the next one. In this way you can build up
 long pipelines of commands without making huge line lengths. The lines
 ending in ``\`` are concatenated until a RUN line that doesn't end in
 ``\`` is found. This concatenated set of RUN lines then constitutes one
-execution. Tcl will substitute variables and arrange for the pipeline to
-be executed. If any process in the pipeline fails, the entire line (and
+execution. :program:`lit` will substitute variables and arrange for the pipeline
+to be executed. If any process in the pipeline fails, the entire line (and
 test case) fails too.
 
 Below is an example of legal RUN lines in a ``.ll`` file:
@@ -250,103 +226,65 @@ Below is an example of legal RUN lines in a ``.ll`` file:
     ; RUN: llvm-dis < %s.bc-13 > %t2
     ; RUN: diff %t1 %t2
 
-As with a Unix shell, the RUN: lines permit pipelines and I/O
+As with a Unix shell, the RUN lines permit pipelines and I/O
 redirection to be used. However, the usage is slightly different than
-for Bash. To check what's legal, see the documentation for the `Tcl
-exec <http://www.tcl.tk/man/tcl8.5/TclCmd/exec.htm#M2>`_ command and the
-`tutorial <http://www.tcl.tk/man/tcl8.5/tutorial/Tcl26.html>`_. The
-major differences are:
-
--  You can't do ``2>&1``. That will cause Tcl to write to a file named
-   ``&1``. Usually this is done to get stderr to go through a pipe. You
-   can do that in tcl with ``|&`` so replace this idiom:
+for Bash. In general, it's useful to read the code of other tests to figure out
+what you can use in yours. The major differences are:
+
+-  You can't do ``2>&1``. That will cause :program:`lit` to write to a file
+   named ``&1``. Usually this is done to get stderr to go through a pipe. You
+   can do that with ``|&`` so replace this idiom:
    ``... 2>&1 | grep`` with ``... |& grep``
 -  You can only redirect to a file, not to another descriptor and not
    from a here document.
--  tcl supports redirecting to open files with the @ syntax but you
-   shouldn't use that here.
 
 There are some quoting rules that you must pay attention to when writing
-your RUN lines. In general nothing needs to be quoted. Tcl won't strip
-off any quote characters so they will get passed to the invoked program.
+your RUN lines. In general nothing needs to be quoted. :program:`lit` won't
+strip off any quote characters so they will get passed to the invoked program.
 For example:
 
 .. code-block:: bash
 
     ... | grep 'find this string'
 
-This will fail because the ' characters are passed to grep. This would
-instruction grep to look for ``'find`` in the files ``this`` and
-``string'``. To avoid this use curly braces to tell Tcl that it should
-treat everything enclosed as one value. So our example would become:
+This will fail because the ``'`` characters are passed to ``grep``. This would
+make ``grep`` to look for ``'find`` in the files ``this`` and
+``string'``. To avoid this use curly braces to tell :program:`lit` that it
+should treat everything enclosed as one value. So our example would become:
 
 .. code-block:: bash
 
     ... | grep {find this string}
 
-Additionally, the characters ``[`` and ``]`` are treated specially by
-Tcl. They tell Tcl to interpret the content as a command to execute.
-Since these characters are often used in regular expressions this can
-have disastrous results and cause the entire test run in a directory to
-fail. For example, a common idiom is to look for some basicblock number:
-
-.. code-block:: bash
-
-    ... | grep bb[2-8]
-
-This, however, will cause Tcl to fail because its going to try to
-execute a program named "2-8". Instead, what you want is this:
-
-.. code-block:: bash
-
-    ... | grep {bb\[2-8\]}
-
-Finally, if you need to pass the ``\`` character down to a program, then
-it must be doubled. This is another Tcl special character. So, suppose
-you had:
-
-.. code-block:: bash
-
-    ... | grep 'i32\*'
-
-This will fail to match what you want (a pointer to i32). First, the
-``'`` do not get stripped off. Second, the ``\`` gets stripped off by
-Tcl so what grep sees is: ``'i32*'``. That's not likely to match
-anything. To resolve this you must use ``\\`` and the ``{}``, like this:
-
-.. code-block:: bash
-
-    ... | grep {i32\\*}
-
-If your system includes GNU ``grep``, make sure that ``GREP_OPTIONS`` is
-not set in your environment. Otherwise, you may get invalid results
-(both false positives and false negatives).
+In general, you should strive to keep your RUN lines as simple as possible,
+using them only to run tools that generate the output you can then examine. The
+recommended way to examine output to figure out if the test passes it using the
+:doc:`FileCheck tool <CommandGuide/FileCheck>`. The usage of ``grep`` in RUN
+lines is discouraged.
 
 The FileCheck utility
 ---------------------
 
-A powerful feature of the RUN: lines is that it allows any arbitrary
+A powerful feature of the RUN lines is that it allows any arbitrary
 commands to be executed as part of the test harness. While standard
-(portable) unix tools like 'grep' work fine on run lines, as you see
-above, there are a lot of caveats due to interaction with Tcl syntax,
+(portable) unix tools like ``grep`` work fine on run lines, as you see
+above, there are a lot of caveats due to interaction with shell syntax,
 and we want to make sure the run lines are portable to a wide range of
-systems. Another major problem is that grep is not very good at checking
+systems. Another major problem is that ``grep`` is not very good at checking
 to verify that the output of a tools contains a series of different
-output in a specific order. The FileCheck tool was designed to help with
-these problems.
+output in a specific order. The :program:`FileCheck` tool was designed to
+help with these problems.
 
-FileCheck is designed to read a file to check from standard input, and the set
-of things to verify from a file specified as a command line argument.
-FileCheck is described in :doc:`the FileCheck man page
+:program:`FileCheck` is designed to read a file to check from standard input,
+and the set of things to verify from a file specified as a command line
+argument. :program:`FileCheck` is described in :doc:`the FileCheck man page
 <CommandGuide/FileCheck>`.
 
 Variables and substitutions
 ---------------------------
 
 With a RUN line there are a number of substitutions that are permitted.
-In general, any Tcl variable that is available in the ``substitute``
-function (in ``test/lib/llvm.exp``) can be substituted into a RUN line.
-To make a substitution just write the variable's name preceded by a $.
+To make a substitution just write the variable's name preceded by a ``$``.
 Additionally, for compatibility reasons with previous versions of the
 test library, certain names can be accessed with an alternate syntax: a
 % prefix. These alternates are deprecated and may go away in a future
@@ -357,15 +295,15 @@ parentheses.
 
 ``$test`` (``%s``)
    The full path to the test case's source. This is suitable for passing on
-   the command line as the input to an llvm tool.
+   the command line as the input to an LLVM tool.
 
 ``%(line)``, ``%(line+<number>)``, ``%(line-<number>)``
    The number of the line where this variable is used, with an optional
-   integer offset. This can be used in tests with multiple RUN: lines,
+   integer offset. This can be used in tests with multiple RUN lines,
    which reference test file's line numbers.
 
 ``$srcdir``
-   The source directory from where the "``make check``" was run.
+   The source directory from where the ``make check`` was run.
 
 ``objdir``
    The object directory that corresponds to the ``$srcdir``.
@@ -398,23 +336,18 @@ parentheses.
 
 ``link`` (``%link``)
    This full link command used to link LLVM executables. This has all the
-   configured -I, -L and -l options.
+   configured ``-I``, ``-L`` and ``-l`` options.
 
 ``shlibext`` (``%shlibext``)
-   The suffix for the host platforms share library (dll) files. This
+   The suffix for the host platforms shared library (DLL) files. This
    includes the period as the first character.
 
-To add more variables, two things need to be changed. First, add a line
-in the ``test/Makefile`` that creates the ``site.exp`` file. This will
-"set" the variable as a global in the site.exp file. Second, in the
-``test/lib/llvm.exp`` file, in the substitute proc, add the variable
-name to the list of "global" declarations at the beginning of the proc.
-That's it, the variable can then be used in test scripts.
+To add more variables, look at ``test/lit.cfg``.
 
 Other Features
 --------------
 
-To make RUN line writing easier, there are several shell scripts located
+To make RUN line writing easier, there are several helper scripts and programs
 in the ``llvm/test/Scripts`` directory. This directory is in the PATH
 when running tests, so you can just call these scripts using their name.
 For example:
@@ -428,9 +361,7 @@ For example:
    purposefully ignoring the result code of the tool
 ``not``
    This script runs its arguments and then inverts the result code from it.
-   Zero result codes become 1. Non-zero result codes become 0. This is
-   useful to invert the result of a grep. For example "not grep X" means
-   succeed only if you don't find X in the input.
+   Zero result codes become 1. Non-zero result codes become 0.
 
 Sometimes it is necessary to mark a test case as "expected fail" or
 XFAIL. You can easily mark a test as XFAIL just by including ``XFAIL:``
@@ -450,7 +381,7 @@ line:
 
     ; XFAIL: darwin,sun
 
-To make the output more useful, the ``llvm_runtest`` function wil scan
+To make the output more useful, :program:`lit` will scan
 the lines of the test case for ones that contain a pattern that matches
 ``PR[0-9]+``. This is the syntax for specifying a PR (Problem Report) number
 that is related to the test case. The number after "PR" specifies the
diff --git a/docs/WritingAnLLVMBackend.html b/docs/WritingAnLLVMBackend.html
deleted file mode 100644
index 0ad472cb92..0000000000
--- a/docs/WritingAnLLVMBackend.html
+++ /dev/null
@@ -1,2557 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <title>Writing an LLVM Compiler Backend</title>
-  <link rel="stylesheet" href="_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>
-  Writing an LLVM Compiler Backend
-</h1>
-
-<ol>
-  <li><a href="#intro">Introduction</a>
-  <ul>
-    <li><a href="#Audience">Audience</a></li>
-    <li><a href="#Prerequisite">Prerequisite Reading</a></li>
-    <li><a href="#Basic">Basic Steps</a></li>
-    <li><a href="#Preliminaries">Preliminaries</a></li>
-  </ul>
-  <li><a href="#TargetMachine">Target Machine</a></li>
-  <li><a href="#TargetRegistration">Target Registration</a></li>
-  <li><a href="#RegisterSet">Register Set and Register Classes</a>
-  <ul>
-    <li><a href="#RegisterDef">Defining a Register</a></li>
-    <li><a href="#RegisterClassDef">Defining a Register Class</a></li>
-    <li><a href="#implementRegister">Implement a subclass of TargetRegisterInfo</a></li>
-  </ul></li>
-  <li><a href="#InstructionSet">Instruction Set</a>
-  <ul>  
-    <li><a href="#operandMapping">Instruction Operand Mapping</a></li>
-    <li><a href="#relationMapping">Instruction Relation Mapping</a></li>
-    <li><a href="#implementInstr">Implement a subclass of TargetInstrInfo</a></li>
-    <li><a href="#branchFolding">Branch Folding and If Conversion</a></li>
-  </ul></li>
-  <li><a href="#InstructionSelector">Instruction Selector</a>
-  <ul>
-    <li><a href="#LegalizePhase">The SelectionDAG Legalize Phase</a>
-    <ul>
-      <li><a href="#promote">Promote</a></li> 
-      <li><a href="#expand">Expand</a></li> 
-      <li><a href="#custom">Custom</a></li> 
-      <li><a href="#legal">Legal</a></li>       
-    </ul></li>
-    <li><a href="#callingConventions">Calling Conventions</a></li>     
-  </ul></li>
-  <li><a href="#assemblyPrinter">Assembly Printer</a></li> 
-  <li><a href="#subtargetSupport">Subtarget Support</a></li> 
-  <li><a href="#jitSupport">JIT Support</a>
-  <ul>  
-    <li><a href="#mce">Machine Code Emitter</a></li>   
-    <li><a href="#targetJITInfo">Target JIT Info</a></li>   
-  </ul></li>
-</ol>
-
-<div class="doc_author">    
-  <p>Written by <a href="http://www.woo.com">Mason Woo</a> and
-                <a href="http://misha.brukman.net">Misha Brukman</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="intro">Introduction</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-This document describes techniques for writing compiler backends that convert
-the LLVM Intermediate Representation (IR) to code for a specified machine or
-other languages. Code intended for a specific machine can take the form of
-either assembly code or binary code (usable for a JIT compiler).
-</p>
-
-<p>
-The backend of LLVM features a target-independent code generator that may create
-output for several types of target CPUs &mdash; including X86, PowerPC, ARM,
-and SPARC. The backend may also be used to generate code targeted at SPUs of the
-Cell processor or GPUs to support the execution of compute kernels.
-</p>
-
-<p>
-The document focuses on existing examples found in subdirectories
-of <tt>llvm/lib/Target</tt> in a downloaded LLVM release. In particular, this
-document focuses on the example of creating a static compiler (one that emits
-text assembly) for a SPARC target, because SPARC has fairly standard
-characteristics, such as a RISC instruction set and straightforward calling
-conventions.
-</p>
-
-<h3>
-  <a name="Audience">Audience</a>
-</h3>
-
-<div>
-
-<p>
-The audience for this document is anyone who needs to write an LLVM backend to
-generate code for a specific hardware or software target.
-</p>
-
-</div>
-
-<h3>
-  <a name="Prerequisite">Prerequisite Reading</a>
-</h3>
-
-<div>  
-
-<p>
-These essential documents must be read before reading this document:
-</p>
-
-<ul>
-<li><i><a href="LangRef.html">LLVM Language Reference
-    Manual</a></i> &mdash; a reference manual for the LLVM assembly language.</li>
-
-<li><i><a href="CodeGenerator.html">The LLVM
-    Target-Independent Code Generator</a></i> &mdash; a guide to the components
-    (classes and code generation algorithms) for translating the LLVM internal
-    representation into machine code for a specified target.  Pay particular
-    attention to the descriptions of code generation stages: Instruction
-    Selection, Scheduling and Formation, SSA-based Optimization, Register
-    Allocation, Prolog/Epilog Code Insertion, Late Machine Code Optimizations,
-    and Code Emission.</li>
-
-<li><i><a href="TableGenFundamentals.html">TableGen
-    Fundamentals</a></i> &mdash;a document that describes the TableGen
-    (<tt>tblgen</tt>) application that manages domain-specific information to
-    support LLVM code generation. TableGen processes input from a target
-    description file (<tt>.td</tt> suffix) and generates C++ code that can be
-    used for code generation.</li>
-
-<li><i><a href="WritingAnLLVMPass.html">Writing an LLVM
-    Pass</a></i> &mdash; The assembly printer is a <tt>FunctionPass</tt>, as are
-    several SelectionDAG processing steps.</li>
-</ul>
-
-<p>
-To follow the SPARC examples in this document, have a copy of
-<i><a href="http://www.sparc.org/standards/V8.pdf">The SPARC Architecture
-Manual, Version 8</a></i> for reference. For details about the ARM instruction
-set, refer to the <i><a href="http://infocenter.arm.com/">ARM Architecture
-Reference Manual</a></i>. For more about the GNU Assembler format
-(<tt>GAS</tt>), see
-<i><a href="http://sourceware.org/binutils/docs/as/index.html">Using As</a></i>,
-especially for the assembly printer. <i>Using As</i> contains a list of target
-machine dependent features.
-</p>
-
-</div>
-
-<h3>
-  <a name="Basic">Basic Steps</a>
-</h3>
-
-<div>
-
-<p>
-To write a compiler backend for LLVM that converts the LLVM IR to code for a
-specified target (machine or other language), follow these steps:
-</p>
-
-<ul>
-<li>Create a subclass of the TargetMachine class that describes characteristics
-    of your target machine. Copy existing examples of specific TargetMachine
-    class and header files; for example, start with
-    <tt>SparcTargetMachine.cpp</tt> and <tt>SparcTargetMachine.h</tt>, but
-    change the file names for your target. Similarly, change code that
-    references "Sparc" to reference your target. </li>
-
-<li>Describe the register set of the target. Use TableGen to generate code for
-    register definition, register aliases, and register classes from a
-    target-specific <tt>RegisterInfo.td</tt> input file. You should also write
-    additional code for a subclass of the TargetRegisterInfo class that
-    represents the class register file data used for register allocation and
-    also describes the interactions between registers.</li>
-
-<li>Describe the instruction set of the target. Use TableGen to generate code
-    for target-specific instructions from target-specific versions of
-    <tt>TargetInstrFormats.td</tt> and <tt>TargetInstrInfo.td</tt>. You should
-    write additional code for a subclass of the TargetInstrInfo class to
-    represent machine instructions supported by the target machine. </li>
-
-<li>Describe the selection and conversion of the LLVM IR from a Directed Acyclic
-    Graph (DAG) representation of instructions to native target-specific
-    instructions. Use TableGen to generate code that matches patterns and
-    selects instructions based on additional information in a target-specific
-    version of <tt>TargetInstrInfo.td</tt>. Write code
-    for <tt>XXXISelDAGToDAG.cpp</tt>, where XXX identifies the specific target,
-    to perform pattern matching and DAG-to-DAG instruction selection. Also write
-    code in <tt>XXXISelLowering.cpp</tt> to replace or remove operations and
-    data types that are not supported natively in a SelectionDAG. </li>
-
-<li>Write code for an assembly printer that converts LLVM IR to a GAS format for
-    your target machine.  You should add assembly strings to the instructions
-    defined in your target-specific version of <tt>TargetInstrInfo.td</tt>. You
-    should also write code for a subclass of AsmPrinter that performs the
-    LLVM-to-assembly conversion and a trivial subclass of TargetAsmInfo.</li>
-
-<li>Optionally, add support for subtargets (i.e., variants with different
-    capabilities). You should also write code for a subclass of the
-    TargetSubtarget class, which allows you to use the <tt>-mcpu=</tt>
-    and <tt>-mattr=</tt> command-line options.</li>
-
-<li>Optionally, add JIT support and create a machine code emitter (subclass of
-    TargetJITInfo) that is used to emit binary code directly into memory. </li>
-</ul>
-
-<p>
-In the <tt>.cpp</tt> and <tt>.h</tt>. files, initially stub up these methods and
-then implement them later. Initially, you may not know which private members
-that the class will need and which components will need to be subclassed.
-</p>
-
-</div>
-
-<h3>
-  <a name="Preliminaries">Preliminaries</a>
-</h3>
-
-<div>
-
-<p>
-To actually create your compiler backend, you need to create and modify a few
-files. The absolute minimum is discussed here. But to actually use the LLVM
-target-independent code generator, you must perform the steps described in
-the <a href="CodeGenerator.html">LLVM
-Target-Independent Code Generator</a> document.
-</p>
-
-<p>
-First, you should create a subdirectory under <tt>lib/Target</tt> to hold all
-the files related to your target. If your target is called "Dummy," create the
-directory <tt>lib/Target/Dummy</tt>.
-</p>
-
-<p>
-In this new
-directory, create a <tt>Makefile</tt>. It is easiest to copy a
-<tt>Makefile</tt> of another target and modify it. It should at least contain
-the <tt>LEVEL</tt>, <tt>LIBRARYNAME</tt> and <tt>TARGET</tt> variables, and then
-include <tt>$(LEVEL)/Makefile.common</tt>. The library can be
-named <tt>LLVMDummy</tt> (for example, see the MIPS target). Alternatively, you
-can split the library into <tt>LLVMDummyCodeGen</tt>
-and <tt>LLVMDummyAsmPrinter</tt>, the latter of which should be implemented in a
-subdirectory below <tt>lib/Target/Dummy</tt> (for example, see the PowerPC
-target).
-</p>
-
-<p>
-Note that these two naming schemes are hardcoded into <tt>llvm-config</tt>.
-Using any other naming scheme will confuse <tt>llvm-config</tt> and produce a
-lot of (seemingly unrelated) linker errors when linking <tt>llc</tt>.
-</p>
-
-<p>
-To make your target actually do something, you need to implement a subclass of
-<tt>TargetMachine</tt>. This implementation should typically be in the file
-<tt>lib/Target/DummyTargetMachine.cpp</tt>, but any file in
-the <tt>lib/Target</tt> directory will be built and should work. To use LLVM's
-target independent code generator, you should do what all current machine
-backends do: create a subclass of <tt>LLVMTargetMachine</tt>. (To create a
-target from scratch, create a subclass of <tt>TargetMachine</tt>.)
-</p>
-
-<p>
-To get LLVM to actually build and link your target, you need to add it to
-the <tt>TARGETS_TO_BUILD</tt> variable. To do this, you modify the configure
-script to know about your target when parsing the <tt>--enable-targets</tt>
-option. Search the configure script for <tt>TARGETS_TO_BUILD</tt>, add your
-target to the lists there (some creativity required), and then
-reconfigure. Alternatively, you can change <tt>autotools/configure.ac</tt> and
-regenerate configure by running <tt>./autoconf/AutoRegen.sh</tt>.
-</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="TargetMachine">Target Machine</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-<tt>LLVMTargetMachine</tt> is designed as a base class for targets implemented
-with the LLVM target-independent code generator. The <tt>LLVMTargetMachine</tt>
-class should be specialized by a concrete target class that implements the
-various virtual methods. <tt>LLVMTargetMachine</tt> is defined as a subclass of
-<tt>TargetMachine</tt> in <tt>include/llvm/Target/TargetMachine.h</tt>. The
-<tt>TargetMachine</tt> class implementation (<tt>TargetMachine.cpp</tt>) also
-processes numerous command-line options.
-</p>
-
-<p>
-To create a concrete target-specific subclass of <tt>LLVMTargetMachine</tt>,
-start by copying an existing <tt>TargetMachine</tt> class and header.  You
-should name the files that you create to reflect your specific target. For
-instance, for the SPARC target, name the files <tt>SparcTargetMachine.h</tt> and
-<tt>SparcTargetMachine.cpp</tt>.
-</p>
-
-<p>
-For a target machine <tt>XXX</tt>, the implementation of
-<tt>XXXTargetMachine</tt> must have access methods to obtain objects that
-represent target components.  These methods are named <tt>get*Info</tt>, and are
-intended to obtain the instruction set (<tt>getInstrInfo</tt>), register set
-(<tt>getRegisterInfo</tt>), stack frame layout (<tt>getFrameInfo</tt>), and
-similar information. <tt>XXXTargetMachine</tt> must also implement the
-<tt>getDataLayout</tt> method to access an object with target-specific data
-characteristics, such as data type size and alignment requirements.
-</p>
-
-<p>
-For instance, for the SPARC target, the header file
-<tt>SparcTargetMachine.h</tt> declares prototypes for several <tt>get*Info</tt>
-and <tt>getDataLayout</tt> methods that simply return a class member.
-</p>
-
-<div class="doc_code">
-<pre>
-namespace llvm {
-
-class Module;
-
-class SparcTargetMachine : public LLVMTargetMachine {
-  const DataLayout DataLayout;       // Calculates type size &amp; alignment
-  SparcSubtarget Subtarget;
-  SparcInstrInfo InstrInfo;
-  TargetFrameInfo FrameInfo;
-  
-protected:
-  virtual const TargetAsmInfo *createTargetAsmInfo() const;
-  
-public:
-  SparcTargetMachine(const Module &amp;M, const std::string &amp;FS);
-
-  virtual const SparcInstrInfo *getInstrInfo() const {return &amp;InstrInfo; }
-  virtual const TargetFrameInfo *getFrameInfo() const {return &amp;FrameInfo; }
-  virtual const TargetSubtarget *getSubtargetImpl() const{return &amp;Subtarget; }
-  virtual const TargetRegisterInfo *getRegisterInfo() const {
-    return &amp;InstrInfo.getRegisterInfo();
-  }
-  virtual const DataLayout *getDataLayout() const { return &amp;DataLayout; }
-  static unsigned getModuleMatchQuality(const Module &amp;M);
-
-  // Pass Pipeline Configuration
-  virtual bool addInstSelector(PassManagerBase &amp;PM, bool Fast);
-  virtual bool addPreEmitPass(PassManagerBase &amp;PM, bool Fast);
-};
-
-} // end namespace llvm
-</pre>
-</div>
-
-<ul>
-<li><tt>getInstrInfo()</tt></li>
-<li><tt>getRegisterInfo()</tt></li>
-<li><tt>getFrameInfo()</tt></li>
-<li><tt>getDataLayout()</tt></li>
-<li><tt>getSubtargetImpl()</tt></li>
-</ul>
-
-<p>For some targets, you also need to support the following methods:</p>
-
-<ul>
-<li><tt>getTargetLowering()</tt></li>
-<li><tt>getJITInfo()</tt></li>
-</ul>
-
-<p>
-In addition, the <tt>XXXTargetMachine</tt> constructor should specify a
-<tt>TargetDescription</tt> string that determines the data layout for the target
-machine, including characteristics such as pointer size, alignment, and
-endianness. For example, the constructor for SparcTargetMachine contains the
-following:
-</p>
-
-<div class="doc_code">
-<pre>
-SparcTargetMachine::SparcTargetMachine(const Module &amp;M, const std::string &amp;FS)
-  : DataLayout("E-p:32:32-f128:128:128"),
-    Subtarget(M, FS), InstrInfo(Subtarget),
-    FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
-}
-</pre>
-</div>
-
-<p>Hyphens separate portions of the <tt>TargetDescription</tt> string.</p>
-
-<ul>
-<li>An upper-case "<tt>E</tt>" in the string indicates a big-endian target data
-    model. a lower-case "<tt>e</tt>" indicates little-endian.</li>
-
-<li>"<tt>p:</tt>" is followed by pointer information: size, ABI alignment, and
-    preferred alignment. If only two figures follow "<tt>p:</tt>", then the
-    first value is pointer size, and the second value is both ABI and preferred
-    alignment.</li>
-
-<li>Then a letter for numeric type alignment: "<tt>i</tt>", "<tt>f</tt>",
-    "<tt>v</tt>", or "<tt>a</tt>" (corresponding to integer, floating point,
-    vector, or aggregate). "<tt>i</tt>", "<tt>v</tt>", or "<tt>a</tt>" are
-    followed by ABI alignment and preferred alignment. "<tt>f</tt>" is followed
-    by three values: the first indicates the size of a long double, then ABI
-    alignment, and then ABI preferred alignment.</li>
-</ul>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="TargetRegistration">Target Registration</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-You must also register your target with the <tt>TargetRegistry</tt>, which is
-what other LLVM tools use to be able to lookup and use your target at
-runtime. The <tt>TargetRegistry</tt> can be used directly, but for most targets
-there are helper templates which should take care of the work for you.</p>
-
-<p>
-All targets should declare a global <tt>Target</tt> object which is used to
-represent the target during registration. Then, in the target's TargetInfo
-library, the target should define that object and use
-the <tt>RegisterTarget</tt> template to register the target. For example, the Sparc registration code looks like this:
-</p>
-
-<div class="doc_code">
-<pre>
-Target llvm::TheSparcTarget;
-
-extern "C" void LLVMInitializeSparcTargetInfo() { 
-  RegisterTarget&lt;Triple::sparc, /*HasJIT=*/false&gt;
-    X(TheSparcTarget, "sparc", "Sparc");
-}
-</pre>
-</div>
-
-<p>
-This allows the <tt>TargetRegistry</tt> to look up the target by name or by
-target triple. In addition, most targets will also register additional features
-which are available in separate libraries. These registration steps are
-separate, because some clients may wish to only link in some parts of the target
--- the JIT code generator does not require the use of the assembler printer, for
-example. Here is an example of registering the Sparc assembly printer:
-</p>
-
-<div class="doc_code">
-<pre>
-extern "C" void LLVMInitializeSparcAsmPrinter() { 
-  RegisterAsmPrinter&lt;SparcAsmPrinter&gt; X(TheSparcTarget);
-}
-</pre>
-</div>
-
-<p>
-For more information, see
-"<a href="/doxygen/TargetRegistry_8h-source.html">llvm/Target/TargetRegistry.h</a>".
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="RegisterSet">Register Set and Register Classes</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-You should describe a concrete target-specific class that represents the
-register file of a target machine. This class is called <tt>XXXRegisterInfo</tt>
-(where <tt>XXX</tt> identifies the target) and represents the class register
-file data that is used for register allocation. It also describes the
-interactions between registers.
-</p>
-
-<p>
-You also need to define register classes to categorize related registers. A
-register class should be added for groups of registers that are all treated the
-same way for some instruction. Typical examples are register classes for
-integer, floating-point, or vector registers. A register allocator allows an
-instruction to use any register in a specified register class to perform the
-instruction in a similar manner. Register classes allocate virtual registers to
-instructions from these sets, and register classes let the target-independent
-register allocator automatically choose the actual registers.
-</p>
-
-<p>
-Much of the code for registers, including register definition, register aliases,
-and register classes, is generated by TableGen from <tt>XXXRegisterInfo.td</tt>
-input files and placed in <tt>XXXGenRegisterInfo.h.inc</tt> and
-<tt>XXXGenRegisterInfo.inc</tt> output files. Some of the code in the
-implementation of <tt>XXXRegisterInfo</tt> requires hand-coding.
-</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="RegisterDef">Defining a Register</a>
-</h3>
-
-<div>
-
-<p>
-The <tt>XXXRegisterInfo.td</tt> file typically starts with register definitions
-for a target machine. The <tt>Register</tt> class (specified
-in <tt>Target.td</tt>) is used to define an object for each register. The
-specified string <tt>n</tt> becomes the <tt>Name</tt> of the register. The
-basic <tt>Register</tt> object does not have any subregisters and does not
-specify any aliases.
-</p>
-
-<div class="doc_code">
-<pre>
-class Register&lt;string n&gt; {
-  string Namespace = "";
-  string AsmName = n;
-  string Name = n;
-  int SpillSize = 0;
-  int SpillAlignment = 0;
-  list&lt;Register&gt; Aliases = [];
-  list&lt;Register&gt; SubRegs = [];
-  list&lt;int&gt; DwarfNumbers = [];
-}
-</pre>
-</div>
-
-<p>
-For example, in the <tt>X86RegisterInfo.td</tt> file, there are register
-definitions that utilize the Register class, such as:
-</p>
-
-<div class="doc_code">
-<pre>
-def AL : Register&lt;"AL"&gt;, DwarfRegNum&lt;[0, 0, 0]&gt;;
-</pre>
-</div>
-
-<p>
-This defines the register <tt>AL</tt> and assigns it values (with
-<tt>DwarfRegNum</tt>) that are used by <tt>gcc</tt>, <tt>gdb</tt>, or a debug
-information writer to identify a register. For register
-<tt>AL</tt>, <tt>DwarfRegNum</tt> takes an array of 3 values representing 3
-different modes: the first element is for X86-64, the second for exception
-handling (EH) on X86-32, and the third is generic. -1 is a special Dwarf number
-that indicates the gcc number is undefined, and -2 indicates the register number
-is invalid for this mode.
-</p>
-
-<p>
-From the previously described line in the <tt>X86RegisterInfo.td</tt> file,
-TableGen generates this code in the <tt>X86GenRegisterInfo.inc</tt> file:
-</p>
-
-<div class="doc_code">
-<pre>
-static const unsigned GR8[] = { X86::AL, ... };
-
-const unsigned AL_AliasSet[] = { X86::AX, X86::EAX, X86::RAX, 0 };
-
-const TargetRegisterDesc RegisterDescriptors[] = { 
-  ...
-{ "AL", "AL", AL_AliasSet, Empty_SubRegsSet, Empty_SubRegsSet, AL_SuperRegsSet }, ...
-</pre>
-</div>
-
-<p>
-From the register info file, TableGen generates a <tt>TargetRegisterDesc</tt>
-object for each register. <tt>TargetRegisterDesc</tt> is defined in
-<tt>include/llvm/Target/TargetRegisterInfo.h</tt> with the following fields:
-</p>
-
-<div class="doc_code">
-<pre>
-struct TargetRegisterDesc {
-  const char     *AsmName;      // Assembly language name for the register
-  const char     *Name;         // Printable name for the reg (for debugging)
-  const unsigned *AliasSet;     // Register Alias Set
-  const unsigned *SubRegs;      // Sub-register set
-  const unsigned *ImmSubRegs;   // Immediate sub-register set
-  const unsigned *SuperRegs;    // Super-register set
-};</pre>
-</div>
-
-<p>
-TableGen uses the entire target description file (<tt>.td</tt>) to determine
-text names for the register (in the <tt>AsmName</tt> and <tt>Name</tt> fields of
-<tt>TargetRegisterDesc</tt>) and the relationships of other registers to the
-defined register (in the other <tt>TargetRegisterDesc</tt> fields). In this
-example, other definitions establish the registers "<tt>AX</tt>",
-"<tt>EAX</tt>", and "<tt>RAX</tt>" as aliases for one another, so TableGen
-generates a null-terminated array (<tt>AL_AliasSet</tt>) for this register alias
-set.
-</p>
-
-<p>
-The <tt>Register</tt> class is commonly used as a base class for more complex
-classes. In <tt>Target.td</tt>, the <tt>Register</tt> class is the base for the
-<tt>RegisterWithSubRegs</tt> class that is used to define registers that need to
-specify subregisters in the <tt>SubRegs</tt> list, as shown here:
-</p>
-
-<div class="doc_code">
-<pre>
-class RegisterWithSubRegs&lt;string n,
-list&lt;Register&gt; subregs&gt; : Register&lt;n&gt; {
-  let SubRegs = subregs;
-}
-</pre>
-</div>
-
-<p>
-In <tt>SparcRegisterInfo.td</tt>, additional register classes are defined for
-SPARC: a Register subclass, SparcReg, and further subclasses: <tt>Ri</tt>,
-<tt>Rf</tt>, and <tt>Rd</tt>. SPARC registers are identified by 5-bit ID
-numbers, which is a feature common to these subclasses. Note the use of
-'<tt>let</tt>' expressions to override values that are initially defined in a
-superclass (such as <tt>SubRegs</tt> field in the <tt>Rd</tt> class).
-</p>
-
-<div class="doc_code">
-<pre>
-class SparcReg&lt;string n&gt; : Register&lt;n&gt; {
-  field bits&lt;5&gt; Num;
-  let Namespace = "SP";
-}
-// Ri - 32-bit integer registers
-class Ri&lt;bits&lt;5&gt; num, string n&gt; :
-SparcReg&lt;n&gt; {
-  let Num = num;
-}
-// Rf - 32-bit floating-point registers
-class Rf&lt;bits&lt;5&gt; num, string n&gt; :
-SparcReg&lt;n&gt; {
-  let Num = num;
-}
-// Rd - Slots in the FP register file for 64-bit
-floating-point values.
-class Rd&lt;bits&lt;5&gt; num, string n,
-list&lt;Register&gt; subregs&gt; : SparcReg&lt;n&gt; {
-  let Num = num;
-  let SubRegs = subregs;
-}
-</pre>
-</div>
-
-<p>
-In the <tt>SparcRegisterInfo.td</tt> file, there are register definitions that
-utilize these subclasses of <tt>Register</tt>, such as:
-</p>
-
-<div class="doc_code">
-<pre>
-def G0 : Ri&lt; 0, "G0"&gt;,
-DwarfRegNum&lt;[0]&gt;;
-def G1 : Ri&lt; 1, "G1"&gt;, DwarfRegNum&lt;[1]&gt;;
-...
-def F0 : Rf&lt; 0, "F0"&gt;,
-DwarfRegNum&lt;[32]&gt;;
-def F1 : Rf&lt; 1, "F1"&gt;,
-DwarfRegNum&lt;[33]&gt;;
-...
-def D0 : Rd&lt; 0, "F0", [F0, F1]&gt;,
-DwarfRegNum&lt;[32]&gt;;
-def D1 : Rd&lt; 2, "F2", [F2, F3]&gt;,
-DwarfRegNum&lt;[34]&gt;;
-</pre>
-</div>
-
-<p>
-The last two registers shown above (<tt>D0</tt> and <tt>D1</tt>) are
-double-precision floating-point registers that are aliases for pairs of
-single-precision floating-point sub-registers. In addition to aliases, the
-sub-register and super-register relationships of the defined register are in
-fields of a register's TargetRegisterDesc.
-</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="RegisterClassDef">Defining a Register Class</a>
-</h3>
-
-<div>
-
-<p>
-The <tt>RegisterClass</tt> class (specified in <tt>Target.td</tt>) is used to
-define an object that represents a group of related registers and also defines
-the default allocation order of the registers. A target description file
-<tt>XXXRegisterInfo.td</tt> that uses <tt>Target.td</tt> can construct register
-classes using the following class:
-</p>
-
-<div class="doc_code">
-<pre>
-class RegisterClass&lt;string namespace,
-list&lt;ValueType&gt; regTypes, int alignment, dag regList&gt; {
-  string Namespace = namespace;
-  list&lt;ValueType&gt; RegTypes = regTypes;
-  int Size = 0;  // spill size, in bits; zero lets tblgen pick the size
-  int Alignment = alignment;
-
-  // CopyCost is the cost of copying a value between two registers
-  // default value 1 means a single instruction
-  // A negative value means copying is extremely expensive or impossible
-  int CopyCost = 1;  
-  dag MemberList = regList;
-  
-  // for register classes that are subregisters of this class
-  list&lt;RegisterClass&gt; SubRegClassList = [];  
-  
-  code MethodProtos = [{}];  // to insert arbitrary code
-  code MethodBodies = [{}];
-}
-</pre>
-</div>
-
-<p>To define a RegisterClass, use the following 4 arguments:</p>
-
-<ul>
-<li>The first argument of the definition is the name of the namespace.</li>
-
-<li>The second argument is a list of <tt>ValueType</tt> register type values
-    that are defined in <tt>include/llvm/CodeGen/ValueTypes.td</tt>. Defined
-    values include integer types (such as <tt>i16</tt>, <tt>i32</tt>,
-    and <tt>i1</tt> for Boolean), floating-point types
-    (<tt>f32</tt>, <tt>f64</tt>), and vector types (for example, <tt>v8i16</tt>
-    for an <tt>8 x i16</tt> vector). All registers in a <tt>RegisterClass</tt>
-    must have the same <tt>ValueType</tt>, but some registers may store vector
-    data in different configurations. For example a register that can process a
-    128-bit vector may be able to handle 16 8-bit integer elements, 8 16-bit
-    integers, 4 32-bit integers, and so on. </li>
-
-<li>The third argument of the <tt>RegisterClass</tt> definition specifies the
-    alignment required of the registers when they are stored or loaded to
-    memory.</li>
-
-<li>The final argument, <tt>regList</tt>, specifies which registers are in this
-    class. If an alternative allocation order method is not specified, then
-    <tt>regList</tt> also defines the order of allocation used by the register
-    allocator. Besides simply listing registers with <tt>(add R0, R1, ...)</tt>,
-    more advanced set operators are available. See
-    <tt>include/llvm/Target/Target.td</tt> for more information.</li>
-</ul>
-
-<p>
-In <tt>SparcRegisterInfo.td</tt>, three RegisterClass objects are defined:
-<tt>FPRegs</tt>, <tt>DFPRegs</tt>, and <tt>IntRegs</tt>. For all three register
-classes, the first argument defines the namespace with the string
-'<tt>SP</tt>'. <tt>FPRegs</tt> defines a group of 32 single-precision
-floating-point registers (<tt>F0</tt> to <tt>F31</tt>); <tt>DFPRegs</tt> defines
-a group of 16 double-precision registers
-(<tt>D0-D15</tt>).
-</p>
-
-<div class="doc_code">
-<pre>
-// F0, F1, F2, ..., F31
-def FPRegs : RegisterClass&lt;"SP", [f32], 32, (sequence "F%u", 0, 31)&gt;;
-
-def DFPRegs : RegisterClass&lt;"SP", [f64], 64,
-                            (add D0, D1, D2, D3, D4, D5, D6, D7, D8,
-                                 D9, D10, D11, D12, D13, D14, D15)&gt;;
-&nbsp;
-def IntRegs : RegisterClass&lt;"SP", [i32], 32,
-    (add L0, L1, L2, L3, L4, L5, L6, L7,
-         I0, I1, I2, I3, I4, I5,
-         O0, O1, O2, O3, O4, O5, O7,
-         G1,
-         // Non-allocatable regs:
-         G2, G3, G4,
-         O6,        // stack ptr
-         I6,        // frame ptr
-         I7,        // return address
-         G0,        // constant zero
-         G5, G6, G7 // reserved for kernel
-    )&gt;;
-</pre>
-</div>
-
-<p>
-Using <tt>SparcRegisterInfo.td</tt> with TableGen generates several output files
-that are intended for inclusion in other source code that you write.
-<tt>SparcRegisterInfo.td</tt> generates <tt>SparcGenRegisterInfo.h.inc</tt>,
-which should be included in the header file for the implementation of the SPARC
-register implementation that you write (<tt>SparcRegisterInfo.h</tt>). In
-<tt>SparcGenRegisterInfo.h.inc</tt> a new structure is defined called
-<tt>SparcGenRegisterInfo</tt> that uses <tt>TargetRegisterInfo</tt> as its
-base. It also specifies types, based upon the defined register
-classes: <tt>DFPRegsClass</tt>, <tt>FPRegsClass</tt>, and <tt>IntRegsClass</tt>.
-</p>
-
-<p>
-<tt>SparcRegisterInfo.td</tt> also generates <tt>SparcGenRegisterInfo.inc</tt>,
-which is included at the bottom of <tt>SparcRegisterInfo.cpp</tt>, the SPARC
-register implementation. The code below shows only the generated integer
-registers and associated register classes. The order of registers
-in <tt>IntRegs</tt> reflects the order in the definition of <tt>IntRegs</tt> in
-the target description file.
-</p>
-
-<div class="doc_code">
-<pre>  // IntRegs Register Class...
-  static const unsigned IntRegs[] = {
-    SP::L0, SP::L1, SP::L2, SP::L3, SP::L4, SP::L5,
-    SP::L6, SP::L7, SP::I0, SP::I1, SP::I2, SP::I3,
-    SP::I4, SP::I5, SP::O0, SP::O1, SP::O2, SP::O3,
-    SP::O4, SP::O5, SP::O7, SP::G1, SP::G2, SP::G3,
-    SP::G4, SP::O6, SP::I6, SP::I7, SP::G0, SP::G5,
-    SP::G6, SP::G7, 
-  };
-
-  // IntRegsVTs Register Class Value Types...
-  static const MVT::ValueType IntRegsVTs[] = {
-    MVT::i32, MVT::Other
-  };
-
-namespace SP {   // Register class instances
-  DFPRegsClass&nbsp;&nbsp;&nbsp; DFPRegsRegClass;
-  FPRegsClass&nbsp;&nbsp;&nbsp;&nbsp; FPRegsRegClass;
-  IntRegsClass&nbsp;&nbsp;&nbsp; IntRegsRegClass;
-...
-  // IntRegs Sub-register Classess...
-  static const TargetRegisterClass* const IntRegsSubRegClasses [] = {
-    NULL
-  };
-...
-  // IntRegs Super-register Classess...
-  static const TargetRegisterClass* const IntRegsSuperRegClasses [] = {
-    NULL
-  };
-...
-  // IntRegs Register Class sub-classes...
-  static const TargetRegisterClass* const IntRegsSubclasses [] = {
-    NULL
-  };
-...
-  // IntRegs Register Class super-classes...
-  static const TargetRegisterClass* const IntRegsSuperclasses [] = {
-    NULL
-  };
-
-  IntRegsClass::IntRegsClass() : TargetRegisterClass(IntRegsRegClassID, 
-    IntRegsVTs, IntRegsSubclasses, IntRegsSuperclasses, IntRegsSubRegClasses, 
-    IntRegsSuperRegClasses, 4, 4, 1, IntRegs, IntRegs + 32) {}
-}
-</pre>
-</div>
-
-<p>
-The register allocators will avoid using reserved registers, and callee saved
-registers are not used until all the volatile registers have been used.  That
-is usually good enough, but in some cases it may be necessary to provide custom
-allocation orders.
-</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="implementRegister">Implement a subclass of</a> 
-  <a href="CodeGenerator.html#targetregisterinfo">TargetRegisterInfo</a>
-</h3>
-
-<div>
-
-<p>
-The final step is to hand code portions of <tt>XXXRegisterInfo</tt>, which
-implements the interface described in <tt>TargetRegisterInfo.h</tt>. These
-functions return <tt>0</tt>, <tt>NULL</tt>, or <tt>false</tt>, unless
-overridden. Here is a list of functions that are overridden for the SPARC
-implementation in <tt>SparcRegisterInfo.cpp</tt>:
-</p>
-
-<ul>
-<li><tt>getCalleeSavedRegs</tt> &mdash; Returns a list of callee-saved registers
-    in the order of the desired callee-save stack frame offset.</li>
-
-<li><tt>getReservedRegs</tt> &mdash; Returns a bitset indexed by physical
-    register numbers, indicating if a particular register is unavailable.</li>
-
-<li><tt>hasFP</tt> &mdash; Return a Boolean indicating if a function should have
-    a dedicated frame pointer register.</li>
-
-<li><tt>eliminateCallFramePseudoInstr</tt> &mdash; If call frame setup or
-    destroy pseudo instructions are used, this can be called to eliminate
-    them.</li>
-
-<li><tt>eliminateFrameIndex</tt> &mdash; Eliminate abstract frame indices from
-    instructions that may use them.</li>
-
-<li><tt>emitPrologue</tt> &mdash; Insert prologue code into the function.</li>
-
-<li><tt>emitEpilogue</tt> &mdash; Insert epilogue code into the function.</li>
-</ul>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="InstructionSet">Instruction Set</a>
-</h2>
-
-<!-- *********************************************************************** -->
-<div>
-
-<p>
-During the early stages of code generation, the LLVM IR code is converted to a
-<tt>SelectionDAG</tt> with nodes that are instances of the <tt>SDNode</tt> class
-containing target instructions. An <tt>SDNode</tt> has an opcode, operands, type
-requirements, and operation properties. For example, is an operation
-commutative, does an operation load from memory. The various operation node
-types are described in the <tt>include/llvm/CodeGen/SelectionDAGNodes.h</tt>
-file (values of the <tt>NodeType</tt> enum in the <tt>ISD</tt> namespace).
-</p>
-
-<p>
-TableGen uses the following target description (<tt>.td</tt>) input files to
-generate much of the code for instruction definition:
-</p>
-
-<ul>
-<li><tt>Target.td</tt> &mdash; Where the <tt>Instruction</tt>, <tt>Operand</tt>,
-    <tt>InstrInfo</tt>, and other fundamental classes are defined.</li>
-
-<li><tt>TargetSelectionDAG.td</tt>&mdash; Used by <tt>SelectionDAG</tt>
-    instruction selection generators, contains <tt>SDTC*</tt> classes (selection
-    DAG type constraint), definitions of <tt>SelectionDAG</tt> nodes (such as
-    <tt>imm</tt>, <tt>cond</tt>, <tt>bb</tt>, <tt>add</tt>, <tt>fadd</tt>,
-    <tt>sub</tt>), and pattern support (<tt>Pattern</tt>, <tt>Pat</tt>,
-    <tt>PatFrag</tt>, <tt>PatLeaf</tt>, <tt>ComplexPattern</tt>.</li>
-
-<li><tt>XXXInstrFormats.td</tt> &mdash; Patterns for definitions of
-    target-specific instructions.</li>
-
-<li><tt>XXXInstrInfo.td</tt> &mdash; Target-specific definitions of instruction
-    templates, condition codes, and instructions of an instruction set. For
-    architecture modifications, a different file name may be used. For example,
-    for Pentium with SSE instruction, this file is <tt>X86InstrSSE.td</tt>, and
-    for Pentium with MMX, this file is <tt>X86InstrMMX.td</tt>.</li>
-</ul>
-
-<p>
-There is also a target-specific <tt>XXX.td</tt> file, where <tt>XXX</tt> is the
-name of the target. The <tt>XXX.td</tt> file includes the other <tt>.td</tt>
-input files, but its contents are only directly important for subtargets.
-</p>
-
-<p>
-You should describe a concrete target-specific class <tt>XXXInstrInfo</tt> that
-represents machine instructions supported by a target machine.
-<tt>XXXInstrInfo</tt> contains an array of <tt>XXXInstrDescriptor</tt> objects,
-each of which describes one instruction. An instruction descriptor defines:</p>
-
-<ul>
-<li>Opcode mnemonic</li>
-
-<li>Number of operands</li>
-
-<li>List of implicit register definitions and uses</li>
-
-<li>Target-independent properties (such as memory access, is commutable)</li>
-
-<li>Target-specific flags </li>
-</ul>
-
-<p>
-The Instruction class (defined in <tt>Target.td</tt>) is mostly used as a base
-for more complex instruction classes.
-</p>
-
-<div class="doc_code">
-<pre>class Instruction {
-  string Namespace = "";
-  dag OutOperandList;       // An dag containing the MI def operand list.
-  dag InOperandList;        // An dag containing the MI use operand list.
-  string AsmString = "";    // The .s format to print the instruction with.
-  list&lt;dag&gt; Pattern;  // Set to the DAG pattern for this instruction
-  list&lt;Register&gt; Uses = []; 
-  list&lt;Register&gt; Defs = [];
-  list&lt;Predicate&gt; Predicates = [];  // predicates turned into isel match code
-  ... remainder not shown for space ...
-}
-</pre>
-</div>
-
-<p>
-A <tt>SelectionDAG</tt> node (<tt>SDNode</tt>) should contain an object
-representing a target-specific instruction that is defined
-in <tt>XXXInstrInfo.td</tt>. The instruction objects should represent
-instructions from the architecture manual of the target machine (such as the
-SPARC Architecture Manual for the SPARC target).
-</p>
-
-<p>
-A single instruction from the architecture manual is often modeled as multiple
-target instructions, depending upon its operands. For example, a manual might
-describe an add instruction that takes a register or an immediate operand. An
-LLVM target could model this with two instructions named <tt>ADDri</tt> and
-<tt>ADDrr</tt>.
-</p>
-
-<p>
-You should define a class for each instruction category and define each opcode
-as a subclass of the category with appropriate parameters such as the fixed
-binary encoding of opcodes and extended opcodes. You should map the register
-bits to the bits of the instruction in which they are encoded (for the
-JIT). Also you should specify how the instruction should be printed when the
-automatic assembly printer is used.
-</p>
-
-<p>
-As is described in the SPARC Architecture Manual, Version 8, there are three
-major 32-bit formats for instructions. Format 1 is only for the <tt>CALL</tt>
-instruction. Format 2 is for branch on condition codes and <tt>SETHI</tt> (set
-high bits of a register) instructions.  Format 3 is for other instructions.
-</p>
-
-<p>
-Each of these formats has corresponding classes in <tt>SparcInstrFormat.td</tt>.
-<tt>InstSP</tt> is a base class for other instruction classes. Additional base
-classes are specified for more precise formats: for example
-in <tt>SparcInstrFormat.td</tt>, <tt>F2_1</tt> is for <tt>SETHI</tt>,
-and <tt>F2_2</tt> is for branches. There are three other base
-classes: <tt>F3_1</tt> for register/register operations, <tt>F3_2</tt> for
-register/immediate operations, and <tt>F3_3</tt> for floating-point
-operations. <tt>SparcInstrInfo.td</tt> also adds the base class Pseudo for
-synthetic SPARC instructions.
-</p>
-
-<p>
-<tt>SparcInstrInfo.td</tt> largely consists of operand and instruction
-definitions for the SPARC target. In <tt>SparcInstrInfo.td</tt>, the following
-target description file entry, <tt>LDrr</tt>, defines the Load Integer
-instruction for a Word (the <tt>LD</tt> SPARC opcode) from a memory address to a
-register. The first parameter, the value 3 (<tt>11<sub>2</sub></tt>), is the
-operation value for this category of operation. The second parameter
-(<tt>000000<sub>2</sub></tt>) is the specific operation value
-for <tt>LD</tt>/Load Word. The third parameter is the output destination, which
-is a register operand and defined in the <tt>Register</tt> target description
-file (<tt>IntRegs</tt>).
-</p>
-
-<div class="doc_code">
-<pre>def LDrr : F3_1 &lt;3, 0b000000, (outs IntRegs:$dst), (ins MEMrr:$addr),
-                 "ld [$addr], $dst",
-                 [(set IntRegs:$dst, (load ADDRrr:$addr))]&gt;;
-</pre>
-</div>
-
-<p>
-The fourth parameter is the input source, which uses the address
-operand <tt>MEMrr</tt> that is defined earlier in <tt>SparcInstrInfo.td</tt>:
-</p>
-
-<div class="doc_code">
-<pre>def MEMrr : Operand&lt;i32&gt; {
-  let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops IntRegs, IntRegs);
-}
-</pre>
-</div>
-
-<p>
-The fifth parameter is a string that is used by the assembly printer and can be
-left as an empty string until the assembly printer interface is implemented. The
-sixth and final parameter is the pattern used to match the instruction during
-the SelectionDAG Select Phase described in
-(<a href="CodeGenerator.html">The LLVM
-Target-Independent Code Generator</a>).  This parameter is detailed in the next
-section, <a href="#InstructionSelector">Instruction Selector</a>.
-</p>
-
-<p>
-Instruction class definitions are not overloaded for different operand types, so
-separate versions of instructions are needed for register, memory, or immediate
-value operands. For example, to perform a Load Integer instruction for a Word
-from an immediate operand to a register, the following instruction class is
-defined:
-</p>
-
-<div class="doc_code">
-<pre>def LDri : F3_2 &lt;3, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr),
-                 "ld [$addr], $dst",
-                 [(set IntRegs:$dst, (load ADDRri:$addr))]&gt;;
-</pre>
-</div>
-
-<p>
-Writing these definitions for so many similar instructions can involve a lot of
-cut and paste. In td files, the <tt>multiclass</tt> directive enables the
-creation of templates to define several instruction classes at once (using
-the <tt>defm</tt> directive). For example in <tt>SparcInstrInfo.td</tt>, the
-<tt>multiclass</tt> pattern <tt>F3_12</tt> is defined to create 2 instruction
-classes each time <tt>F3_12</tt> is invoked:
-</p>
-
-<div class="doc_code">
-<pre>multiclass F3_12 &lt;string OpcStr, bits&lt;6&gt; Op3Val, SDNode OpNode&gt; {
-  def rr  : F3_1 &lt;2, Op3Val, 
-                 (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
-                 !strconcat(OpcStr, " $b, $c, $dst"),
-                 [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]&gt;;
-  def ri  : F3_2 &lt;2, Op3Val,
-                 (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
-                 !strconcat(OpcStr, " $b, $c, $dst"),
-                 [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]&gt;;
-}
-</pre>
-</div>
-
-<p>
-So when the <tt>defm</tt> directive is used for the <tt>XOR</tt>
-and <tt>ADD</tt> instructions, as seen below, it creates four instruction
-objects: <tt>XORrr</tt>, <tt>XORri</tt>, <tt>ADDrr</tt>, and <tt>ADDri</tt>.
-</p>
-
-<div class="doc_code">
-<pre>
-defm XOR   : F3_12&lt;"xor", 0b000011, xor&gt;;
-defm ADD   : F3_12&lt;"add", 0b000000, add&gt;;
-</pre>
-</div>
-
-<p>
-<tt>SparcInstrInfo.td</tt> also includes definitions for condition codes that
-are referenced by branch instructions. The following definitions
-in <tt>SparcInstrInfo.td</tt> indicate the bit location of the SPARC condition
-code. For example, the 10<sup>th</sup> bit represents the 'greater than'
-condition for integers, and the 22<sup>nd</sup> bit represents the 'greater
-than' condition for floats.
-</p>
-
-<div class="doc_code">
-<pre>
-def ICC_NE  : ICC_VAL&lt; 9&gt;;  // Not Equal
-def ICC_E   : ICC_VAL&lt; 1&gt;;  // Equal
-def ICC_G   : ICC_VAL&lt;10&gt;;  // Greater
-...
-def FCC_U   : FCC_VAL&lt;23&gt;;  // Unordered
-def FCC_G   : FCC_VAL&lt;22&gt;;  // Greater
-def FCC_UG  : FCC_VAL&lt;21&gt;;  // Unordered or Greater
-...
-</pre>
-</div>
-
-<p>
-(Note that <tt>Sparc.h</tt> also defines enums that correspond to the same SPARC
-condition codes. Care must be taken to ensure the values in <tt>Sparc.h</tt>
-correspond to the values in <tt>SparcInstrInfo.td</tt>. I.e.,
-<tt>SPCC::ICC_NE = 9</tt>, <tt>SPCC::FCC_U = 23</tt> and so on.)
-</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="operandMapping">Instruction Operand Mapping</a>
-</h3>
-
-<div>
-
-<p>
-The code generator backend maps instruction operands to fields in the
-instruction.  Operands are assigned to unbound fields in the instruction in the
-order they are defined. Fields are bound when they are assigned a value.  For
-example, the Sparc target defines the <tt>XNORrr</tt> instruction as
-a <tt>F3_1</tt> format instruction having three operands.
-</p>
-
-<div class="doc_code">
-<pre>
-def XNORrr  : F3_1&lt;2, 0b000111,
-                   (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
-                   "xnor $b, $c, $dst",
-                   [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]&gt;;
-</pre>
-</div>
-
-<p>
-The instruction templates in <tt>SparcInstrFormats.td</tt> show the base class
-for <tt>F3_1</tt> is <tt>InstSP</tt>.
-</p>
-
-<div class="doc_code">
-<pre>
-class InstSP&lt;dag outs, dag ins, string asmstr, list&lt;dag&gt; pattern&gt; : Instruction {
-  field bits&lt;32&gt; Inst;
-  let Namespace = "SP";
-  bits&lt;2&gt; op;
-  let Inst{31-30} = op;       
-  dag OutOperandList = outs;
-  dag InOperandList = ins;
-  let AsmString   = asmstr;
-  let Pattern = pattern;
-}
-</pre>
-</div>
-
-<p><tt>InstSP</tt> leaves the <tt>op</tt> field unbound.</p>
-
-<div class="doc_code">
-<pre>
-class F3&lt;dag outs, dag ins, string asmstr, list&lt;dag&gt; pattern&gt;
-    : InstSP&lt;outs, ins, asmstr, pattern&gt; {
-  bits&lt;5&gt; rd;
-  bits&lt;6&gt; op3;
-  bits&lt;5&gt; rs1;
-  let op{1} = 1;   // Op = 2 or 3
-  let Inst{29-25} = rd;
-  let Inst{24-19} = op3;
-  let Inst{18-14} = rs1;
-}
-</pre>
-</div>
-
-<p>
-<tt>F3</tt> binds the <tt>op</tt> field and defines the <tt>rd</tt>,
-<tt>op3</tt>, and <tt>rs1</tt> fields.  <tt>F3</tt> format instructions will
-bind the operands <tt>rd</tt>, <tt>op3</tt>, and <tt>rs1</tt> fields.
-</p>
-
-<div class="doc_code">
-<pre>
-class F3_1&lt;bits&lt;2&gt; opVal, bits&lt;6&gt; op3val, dag outs, dag ins,
-           string asmstr, list&lt;dag&gt; pattern&gt; : F3&lt;outs, ins, asmstr, pattern&gt; {
-  bits&lt;8&gt; asi = 0; // asi not currently used
-  bits&lt;5&gt; rs2;
-  let op         = opVal;
-  let op3        = op3val;
-  let Inst{13}   = 0;     // i field = 0
-  let Inst{12-5} = asi;   // address space identifier
-  let Inst{4-0}  = rs2;
-}
-</pre>
-</div>
-
-<p>
-<tt>F3_1</tt> binds the <tt>op3</tt> field and defines the <tt>rs2</tt>
-fields.  <tt>F3_1</tt> format instructions will bind the operands to the <tt>rd</tt>,
-<tt>rs1</tt>, and <tt>rs2</tt> fields. This results in the <tt>XNORrr</tt>
-instruction binding <tt>$dst</tt>, <tt>$b</tt>, and <tt>$c</tt> operands to
-the <tt>rd</tt>, <tt>rs1</tt>, and <tt>rs2</tt> fields respectively.
-</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="relationMapping">Instruction Relation Mapping</a>
-</h3>
-
-<div>
-
-<p>
-This TableGen feature is used to relate instructions with each other. It is
-particularly useful when you have multiple instruction formats and need to
-switch between them after instruction selection. This entire feature is driven
-by relation models which can be defined in <tt>XXXInstrInfo.td</tt> files
-according to the target-specific instruction set. Relation models are defined
-using <tt>InstrMapping</tt> class as a base. TableGen parses all the models
-and generates instruction relation maps using the specified information.
-Relation maps are emitted as tables in the <tt>XXXGenInstrInfo.inc</tt> file
-along with the functions to query them. For the detailed information on how to
-use this feature, please refer to
-<a href="HowToUseInstrMappings.html">How to add Instruction Mappings</a>
-document.
-</p>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="implementInstr">Implement a subclass of </a>
-  <a href="CodeGenerator.html#targetinstrinfo">TargetInstrInfo</a>
-</h3>
-
-<div>
-
-<p>
-The final step is to hand code portions of <tt>XXXInstrInfo</tt>, which
-implements the interface described in <tt>TargetInstrInfo.h</tt>. These
-functions return <tt>0</tt> or a Boolean or they assert, unless
-overridden. Here's a list of functions that are overridden for the SPARC
-implementation in <tt>SparcInstrInfo.cpp</tt>:
-</p>
-
-<ul>
-<li><tt>isLoadFromStackSlot</tt> &mdash; If the specified machine instruction is
-    a direct load from a stack slot, return the register number of the
-    destination and the <tt>FrameIndex</tt> of the stack slot.</li>
-
-<li><tt>isStoreToStackSlot</tt> &mdash; If the specified machine instruction is
-    a direct store to a stack slot, return the register number of the
-    destination and the <tt>FrameIndex</tt> of the stack slot.</li>
-
-<li><tt>copyPhysReg</tt> &mdash; Copy values between a pair of physical
-    registers.</li>
-
-<li><tt>storeRegToStackSlot</tt> &mdash; Store a register value to a stack
-    slot.</li>
-
-<li><tt>loadRegFromStackSlot</tt> &mdash; Load a register value from a stack
-    slot.</li>
-
-<li><tt>storeRegToAddr</tt> &mdash; Store a register value to memory.</li>
-
-<li><tt>loadRegFromAddr</tt> &mdash; Load a register value from memory.</li>
-
-<li><tt>foldMemoryOperand</tt> &mdash; Attempt to combine instructions of any
-    load or store instruction for the specified operand(s).</li>
-</ul>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="branchFolding">Branch Folding and If Conversion</a>
-</h3>
-<div>
-
-<p>
-Performance can be improved by combining instructions or by eliminating
-instructions that are never reached. The <tt>AnalyzeBranch</tt> method
-in <tt>XXXInstrInfo</tt> may be implemented to examine conditional instructions
-and remove unnecessary instructions. <tt>AnalyzeBranch</tt> looks at the end of
-a machine basic block (MBB) for opportunities for improvement, such as branch
-folding and if conversion. The <tt>BranchFolder</tt> and <tt>IfConverter</tt>
-machine function passes (see the source files <tt>BranchFolding.cpp</tt> and
-<tt>IfConversion.cpp</tt> in the <tt>lib/CodeGen</tt> directory) call
-<tt>AnalyzeBranch</tt> to improve the control flow graph that represents the
-instructions.
-</p>
-
-<p>
-Several implementations of <tt>AnalyzeBranch</tt> (for ARM, Alpha, and X86) can
-be examined as models for your own <tt>AnalyzeBranch</tt> implementation. Since
-SPARC does not implement a useful <tt>AnalyzeBranch</tt>, the ARM target
-implementation is shown below.
-</p>
-
-<p><tt>AnalyzeBranch</tt> returns a Boolean value and takes four parameters:</p>
-
-<ul>
-<li><tt>MachineBasicBlock &amp;MBB</tt> &mdash; The incoming block to be
-    examined.</li>
-
-<li><tt>MachineBasicBlock *&amp;TBB</tt> &mdash; A destination block that is
-    returned. For a conditional branch that evaluates to true, <tt>TBB</tt> is
-    the destination.</li>
-
-<li><tt>MachineBasicBlock *&amp;FBB</tt> &mdash; For a conditional branch that
-    evaluates to false, <tt>FBB</tt> is returned as the destination.</li>
-
-<li><tt>std::vector&lt;MachineOperand&gt; &amp;Cond</tt> &mdash; List of
-    operands to evaluate a condition for a conditional branch.</li>
-</ul>
-
-<p>
-In the simplest case, if a block ends without a branch, then it falls through to
-the successor block. No destination blocks are specified for either <tt>TBB</tt>
-or <tt>FBB</tt>, so both parameters return <tt>NULL</tt>. The start of
-the <tt>AnalyzeBranch</tt> (see code below for the ARM target) shows the
-function parameters and the code for the simplest case.
-</p>
-
-<div class="doc_code">
-<pre>bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &amp;MBB,
-        MachineBasicBlock *&amp;TBB, MachineBasicBlock *&amp;FBB,
-        std::vector&lt;MachineOperand&gt; &amp;Cond) const
-{
-  MachineBasicBlock::iterator I = MBB.end();
-  if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
-    return false;
-</pre>
-</div>
-
-<p>
-If a block ends with a single unconditional branch instruction, then
-<tt>AnalyzeBranch</tt> (shown below) should return the destination of that
-branch in the <tt>TBB</tt> parameter.
-</p>
-
-<div class="doc_code">
-<pre>
-  if (LastOpc == ARM::B || LastOpc == ARM::tB) {
-    TBB = LastInst-&gt;getOperand(0).getMBB();
-    return false;
-  }
-</pre>
-</div>
-
-<p>
-If a block ends with two unconditional branches, then the second branch is never
-reached. In that situation, as shown below, remove the last branch instruction
-and return the penultimate branch in the <tt>TBB</tt> parameter.
-</p>
-
-<div class="doc_code">
-<pre>
-  if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB) &amp;&amp;
-      (LastOpc == ARM::B || LastOpc == ARM::tB)) {
-    TBB = SecondLastInst-&gt;getOperand(0).getMBB();
-    I = LastInst;
-    I-&gt;eraseFromParent();
-    return false;
-  }
-</pre>
-</div>
-
-<p>
-A block may end with a single conditional branch instruction that falls through
-to successor block if the condition evaluates to false. In that case,
-<tt>AnalyzeBranch</tt> (shown below) should return the destination of that
-conditional branch in the <tt>TBB</tt> parameter and a list of operands in
-the <tt>Cond</tt> parameter to evaluate the condition.
-</p>
-
-<div class="doc_code">
-<pre>
-  if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) {
-    // Block ends with fall-through condbranch.
-    TBB = LastInst-&gt;getOperand(0).getMBB();
-    Cond.push_back(LastInst-&gt;getOperand(1));
-    Cond.push_back(LastInst-&gt;getOperand(2));
-    return false;
-  }
-</pre>
-</div>
-
-<p>
-If a block ends with both a conditional branch and an ensuing unconditional
-branch, then <tt>AnalyzeBranch</tt> (shown below) should return the conditional
-branch destination (assuming it corresponds to a conditional evaluation of
-'<tt>true</tt>') in the <tt>TBB</tt> parameter and the unconditional branch
-destination in the <tt>FBB</tt> (corresponding to a conditional evaluation of
-'<tt>false</tt>').  A list of operands to evaluate the condition should be
-returned in the <tt>Cond</tt> parameter.
-</p>
-
-<div class="doc_code">
-<pre>
-  unsigned SecondLastOpc = SecondLastInst-&gt;getOpcode();
-
-  if ((SecondLastOpc == ARM::Bcc &amp;&amp; LastOpc == ARM::B) ||
-      (SecondLastOpc == ARM::tBcc &amp;&amp; LastOpc == ARM::tB)) {
-    TBB =  SecondLastInst-&gt;getOperand(0).getMBB();
-    Cond.push_back(SecondLastInst-&gt;getOperand(1));
-    Cond.push_back(SecondLastInst-&gt;getOperand(2));
-    FBB = LastInst-&gt;getOperand(0).getMBB();
-    return false;
-  }
-</pre>
-</div>
-
-<p>
-For the last two cases (ending with a single conditional branch or ending with
-one conditional and one unconditional branch), the operands returned in
-the <tt>Cond</tt> parameter can be passed to methods of other instructions to
-create new branches or perform other operations. An implementation
-of <tt>AnalyzeBranch</tt> requires the helper methods <tt>RemoveBranch</tt>
-and <tt>InsertBranch</tt> to manage subsequent operations.
-</p>
-
-<p>
-<tt>AnalyzeBranch</tt> should return false indicating success in most circumstances.
-<tt>AnalyzeBranch</tt> should only return true when the method is stumped about what to
-do, for example, if a block has three terminating branches. <tt>AnalyzeBranch</tt> may
-return true if it encounters a terminator it cannot handle, such as an indirect
-branch.
-</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="InstructionSelector">Instruction Selector</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-LLVM uses a <tt>SelectionDAG</tt> to represent LLVM IR instructions, and nodes
-of the <tt>SelectionDAG</tt> ideally represent native target
-instructions. During code generation, instruction selection passes are performed
-to convert non-native DAG instructions into native target-specific
-instructions. The pass described in <tt>XXXISelDAGToDAG.cpp</tt> is used to
-match patterns and perform DAG-to-DAG instruction selection. Optionally, a pass
-may be defined (in <tt>XXXBranchSelector.cpp</tt>) to perform similar DAG-to-DAG
-operations for branch instructions. Later, the code in
-<tt>XXXISelLowering.cpp</tt> replaces or removes operations and data types not
-supported natively (legalizes) in a <tt>SelectionDAG</tt>.
-</p>
-
-<p>
-TableGen generates code for instruction selection using the following target
-description input files:
-</p>
-
-<ul>
-<li><tt>XXXInstrInfo.td</tt> &mdash; Contains definitions of instructions in a
-    target-specific instruction set, generates <tt>XXXGenDAGISel.inc</tt>, which
-    is included in <tt>XXXISelDAGToDAG.cpp</tt>.</li>
-
-<li><tt>XXXCallingConv.td</tt> &mdash; Contains the calling and return value
-    conventions for the target architecture, and it generates
-    <tt>XXXGenCallingConv.inc</tt>, which is included in
-    <tt>XXXISelLowering.cpp</tt>.</li>
-</ul>
-
-<p>
-The implementation of an instruction selection pass must include a header that
-declares the <tt>FunctionPass</tt> class or a subclass of <tt>FunctionPass</tt>. In
-<tt>XXXTargetMachine.cpp</tt>, a Pass Manager (PM) should add each instruction
-selection pass into the queue of passes to run.
-</p>
-
-<p>
-The LLVM static compiler (<tt>llc</tt>) is an excellent tool for visualizing the
-contents of DAGs. To display the <tt>SelectionDAG</tt> before or after specific
-processing phases, use the command line options for <tt>llc</tt>, described
-at <a href="CodeGenerator.html#selectiondag_process">
-SelectionDAG Instruction Selection Process</a>.
-</p>
-
-<p>
-To describe instruction selector behavior, you should add patterns for lowering
-LLVM code into a <tt>SelectionDAG</tt> as the last parameter of the instruction
-definitions in <tt>XXXInstrInfo.td</tt>. For example, in
-<tt>SparcInstrInfo.td</tt>, this entry defines a register store operation, and
-the last parameter describes a pattern with the store DAG operator.
-</p>
-
-<div class="doc_code">
-<pre>
-def STrr  : F3_1&lt; 3, 0b000100, (outs), (ins MEMrr:$addr, IntRegs:$src),
-                 "st $src, [$addr]", [(store IntRegs:$src, ADDRrr:$addr)]&gt;;
-</pre>
-</div>
-
-<p>
-<tt>ADDRrr</tt> is a memory mode that is also defined in
-<tt>SparcInstrInfo.td</tt>:
-</p>
-
-<div class="doc_code">
-<pre>
-def ADDRrr : ComplexPattern&lt;i32, 2, "SelectADDRrr", [], []&gt;;
-</pre>
-</div>
-
-<p>
-The definition of <tt>ADDRrr</tt> refers to <tt>SelectADDRrr</tt>, which is a
-function defined in an implementation of the Instructor Selector (such
-as <tt>SparcISelDAGToDAG.cpp</tt>).
-</p>
-
-<p>
-In <tt>lib/Target/TargetSelectionDAG.td</tt>, the DAG operator for store is
-defined below:
-</p>
-
-<div class="doc_code">
-<pre>
-def store : PatFrag&lt;(ops node:$val, node:$ptr),
-                    (st node:$val, node:$ptr), [{
-  if (StoreSDNode *ST = dyn_cast&lt;StoreSDNode&gt;(N))
-    return !ST-&gt;isTruncatingStore() &amp;&amp; 
-           ST-&gt;getAddressingMode() == ISD::UNINDEXED;
-  return false;
-}]&gt;;
-</pre>
-</div>
-
-<p>
-<tt>XXXInstrInfo.td</tt> also generates (in <tt>XXXGenDAGISel.inc</tt>) the
-<tt>SelectCode</tt> method that is used to call the appropriate processing
-method for an instruction. In this example, <tt>SelectCode</tt>
-calls <tt>Select_ISD_STORE</tt> for the <tt>ISD::STORE</tt> opcode.
-</p>
-
-<div class="doc_code">
-<pre>
-SDNode *SelectCode(SDValue N) {
-  ... 
-  MVT::ValueType NVT = N.getNode()-&gt;getValueType(0);
-  switch (N.getOpcode()) {
-  case ISD::STORE: {
-    switch (NVT) {
-    default:
-      return Select_ISD_STORE(N);
-      break;
-    }
-    break;
-  }
-  ...
-</pre>
-</div>
-
-<p>
-The pattern for <tt>STrr</tt> is matched, so elsewhere in
-<tt>XXXGenDAGISel.inc</tt>, code for <tt>STrr</tt> is created for
-<tt>Select_ISD_STORE</tt>. The <tt>Emit_22</tt> method is also generated
-in <tt>XXXGenDAGISel.inc</tt> to complete the processing of this
-instruction.
-</p>
-
-<div class="doc_code">
-<pre>
-SDNode *Select_ISD_STORE(const SDValue &amp;N) {
-  SDValue Chain = N.getOperand(0);
-  if (Predicate_store(N.getNode())) {
-    SDValue N1 = N.getOperand(1);
-    SDValue N2 = N.getOperand(2);
-    SDValue CPTmp0;
-    SDValue CPTmp1;
-
-    // Pattern: (st:void IntRegs:i32:$src, 
-    //           ADDRrr:i32:$addr)&lt;&lt;P:Predicate_store&gt;&gt;
-    // Emits: (STrr:void ADDRrr:i32:$addr, IntRegs:i32:$src)
-    // Pattern complexity = 13  cost = 1  size = 0
-    if (SelectADDRrr(N, N2, CPTmp0, CPTmp1) &amp;&amp;
-        N1.getNode()-&gt;getValueType(0) == MVT::i32 &amp;&amp;
-        N2.getNode()-&gt;getValueType(0) == MVT::i32) {
-      return Emit_22(N, SP::STrr, CPTmp0, CPTmp1);
-    }
-...
-</pre>
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="LegalizePhase">The SelectionDAG Legalize Phase</a>
-</h3>
-
-<div>
-
-<p>
-The Legalize phase converts a DAG to use types and operations that are natively
-supported by the target. For natively unsupported types and operations, you need
-to add code to the target-specific XXXTargetLowering implementation to convert
-unsupported types and operations to supported ones.
-</p>
-
-<p>
-In the constructor for the <tt>XXXTargetLowering</tt> class, first use the
-<tt>addRegisterClass</tt> method to specify which types are supports and which
-register classes are associated with them. The code for the register classes are
-generated by TableGen from <tt>XXXRegisterInfo.td</tt> and placed
-in <tt>XXXGenRegisterInfo.h.inc</tt>. For example, the implementation of the
-constructor for the SparcTargetLowering class (in
-<tt>SparcISelLowering.cpp</tt>) starts with the following code:
-</p>
-
-<div class="doc_code">
-<pre>
-addRegisterClass(MVT::i32, SP::IntRegsRegisterClass);
-addRegisterClass(MVT::f32, SP::FPRegsRegisterClass);
-addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass); 
-</pre>
-</div>
-
-<p>
-You should examine the node types in the <tt>ISD</tt> namespace
-(<tt>include/llvm/CodeGen/SelectionDAGNodes.h</tt>) and determine which
-operations the target natively supports. For operations that do <b>not</b> have
-native support, add a callback to the constructor for the XXXTargetLowering
-class, so the instruction selection process knows what to do. The TargetLowering
-class callback methods (declared in <tt>llvm/Target/TargetLowering.h</tt>) are:
-</p>
-
-<ul>
-<li><tt>setOperationAction</tt> &mdash; General operation.</li>
-
-<li><tt>setLoadExtAction</tt> &mdash; Load with extension.</li>
-
-<li><tt>setTruncStoreAction</tt> &mdash; Truncating store.</li>
-
-<li><tt>setIndexedLoadAction</tt> &mdash; Indexed load.</li>
-
-<li><tt>setIndexedStoreAction</tt> &mdash; Indexed store.</li>
-
-<li><tt>setConvertAction</tt> &mdash; Type conversion.</li>
-
-<li><tt>setCondCodeAction</tt> &mdash; Support for a given condition code.</li>
-</ul>
-
-<p>
-Note: on older releases, <tt>setLoadXAction</tt> is used instead
-of <tt>setLoadExtAction</tt>.  Also, on older releases,
-<tt>setCondCodeAction</tt> may not be supported. Examine your release
-to see what methods are specifically supported.
-</p>
-
-<p>
-These callbacks are used to determine that an operation does or does not work
-with a specified type (or types). And in all cases, the third parameter is
-a <tt>LegalAction</tt> type enum value: <tt>Promote</tt>, <tt>Expand</tt>,
-<tt>Custom</tt>, or <tt>Legal</tt>. <tt>SparcISelLowering.cpp</tt>
-contains examples of all four <tt>LegalAction</tt> values.
-</p>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="promote">Promote</a>
-</h4>
-
-<div>
-
-<p>
-For an operation without native support for a given type, the specified type may
-be promoted to a larger type that is supported. For example, SPARC does not
-support a sign-extending load for Boolean values (<tt>i1</tt> type), so
-in <tt>SparcISelLowering.cpp</tt> the third parameter below, <tt>Promote</tt>,
-changes <tt>i1</tt> type values to a large type before loading.
-</p>
-
-<div class="doc_code">
-<pre>
-setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
-</pre>
-</div>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="expand">Expand</a>
-</h4>
-
-<div>
-
-<p>
-For a type without native support, a value may need to be broken down further,
-rather than promoted. For an operation without native support, a combination of
-other operations may be used to similar effect. In SPARC, the floating-point
-sine and cosine trig operations are supported by expansion to other operations,
-as indicated by the third parameter, <tt>Expand</tt>, to
-<tt>setOperationAction</tt>:
-</p>
-
-<div class="doc_code">
-<pre>
-setOperationAction(ISD::FSIN, MVT::f32, Expand);
-setOperationAction(ISD::FCOS, MVT::f32, Expand);
-</pre>
-</div>
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="custom">Custom</a>
-</h4>
-
-<div>
-
-<p>
-For some operations, simple type promotion or operation expansion may be
-insufficient. In some cases, a special intrinsic function must be implemented.
-</p>
-
-<p>
-For example, a constant value may require special treatment, or an operation may
-require spilling and restoring registers in the stack and working with register
-allocators.
-</p>
-
-<p>
-As seen in <tt>SparcISelLowering.cpp</tt> code below, to perform a type
-conversion from a floating point value to a signed integer, first the
-<tt>setOperationAction</tt> should be called with <tt>Custom</tt> as the third
-parameter:
-</p>
-
-<div class="doc_code">
-<pre>
-setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
-</pre>
-</div>    
-
-<p>
-In the <tt>LowerOperation</tt> method, for each <tt>Custom</tt> operation, a
-case statement should be added to indicate what function to call. In the
-following code, an <tt>FP_TO_SINT</tt> opcode will call
-the <tt>LowerFP_TO_SINT</tt> method:
-</p>
-
-<div class="doc_code">
-<pre>
-SDValue SparcTargetLowering::LowerOperation(SDValue Op, SelectionDAG &amp;DAG) {
-  switch (Op.getOpcode()) {
-  case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
-  ...
-  }
-}
-</pre>
-</div>
-
-<p>
-Finally, the <tt>LowerFP_TO_SINT</tt> method is implemented, using an FP
-register to convert the floating-point value to an integer.
-</p>
-
-<div class="doc_code">
-<pre>
-static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &amp;DAG) {
-  assert(Op.getValueType() == MVT::i32);
-  Op = DAG.getNode(SPISD::FTOI, MVT::f32, Op.getOperand(0));
-  return DAG.getNode(ISD::BITCAST, MVT::i32, Op);
-}
-</pre>
-</div>    
-
-</div>
-
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="legal">Legal</a>
-</h4>
-
-<div>
-
-<p>
-The <tt>Legal</tt> LegalizeAction enum value simply indicates that an
-operation <b>is</b> natively supported. <tt>Legal</tt> represents the default
-condition, so it is rarely used. In <tt>SparcISelLowering.cpp</tt>, the action
-for <tt>CTPOP</tt> (an operation to count the bits set in an integer) is
-natively supported only for SPARC v9. The following code enables
-the <tt>Expand</tt> conversion technique for non-v9 SPARC implementations.
-</p>
-
-<div class="doc_code">
-<pre>
-setOperationAction(ISD::CTPOP, MVT::i32, Expand);
-...
-if (TM.getSubtarget&lt;SparcSubtarget&gt;().isV9())
-  setOperationAction(ISD::CTPOP, MVT::i32, Legal);
-  case ISD::SETULT: return SPCC::ICC_CS;
-  case ISD::SETULE: return SPCC::ICC_LEU;
-  case ISD::SETUGT: return SPCC::ICC_GU;
-  case ISD::SETUGE: return SPCC::ICC_CC;
-  }
-}
-</pre>
-</div>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="callingConventions">Calling Conventions</a>
-</h3>
-
-<div>
-
-<p>
-To support target-specific calling conventions, <tt>XXXGenCallingConv.td</tt>
-uses interfaces (such as CCIfType and CCAssignToReg) that are defined in
-<tt>lib/Target/TargetCallingConv.td</tt>. TableGen can take the target
-descriptor file <tt>XXXGenCallingConv.td</tt> and generate the header
-file <tt>XXXGenCallingConv.inc</tt>, which is typically included
-in <tt>XXXISelLowering.cpp</tt>. You can use the interfaces in
-<tt>TargetCallingConv.td</tt> to specify:
-</p>
-
-<ul>
-<li>The order of parameter allocation.</li>
-
-<li>Where parameters and return values are placed (that is, on the stack or in
-    registers).</li>
-
-<li>Which registers may be used.</li>
-
-<li>Whether the caller or callee unwinds the stack.</li>
-</ul>
-
-<p>
-The following example demonstrates the use of the <tt>CCIfType</tt> and
-<tt>CCAssignToReg</tt> interfaces. If the <tt>CCIfType</tt> predicate is true
-(that is, if the current argument is of type <tt>f32</tt> or <tt>f64</tt>), then
-the action is performed. In this case, the <tt>CCAssignToReg</tt> action assigns
-the argument value to the first available register: either <tt>R0</tt>
-or <tt>R1</tt>.
-</p>
-
-<div class="doc_code">
-<pre>
-CCIfType&lt;[f32,f64], CCAssignToReg&lt;[R0, R1]&gt;&gt;
-</pre>
-</div>
-
-<p>
-<tt>SparcCallingConv.td</tt> contains definitions for a target-specific
-return-value calling convention (RetCC_Sparc32) and a basic 32-bit C calling
-convention (<tt>CC_Sparc32</tt>). The definition of <tt>RetCC_Sparc32</tt>
-(shown below) indicates which registers are used for specified scalar return
-types. A single-precision float is returned to register <tt>F0</tt>, and a
-double-precision float goes to register <tt>D0</tt>. A 32-bit integer is
-returned in register <tt>I0</tt> or <tt>I1</tt>.
-</p>
-
-<div class="doc_code">
-<pre>
-def RetCC_Sparc32 : CallingConv&lt;[
-  CCIfType&lt;[i32], CCAssignToReg&lt;[I0, I1]&gt;&gt;,
-  CCIfType&lt;[f32], CCAssignToReg&lt;[F0]&gt;&gt;,
-  CCIfType&lt;[f64], CCAssignToReg&lt;[D0]&gt;&gt;
-]&gt;;
-</pre>
-</div>
-
-<p>
-The definition of <tt>CC_Sparc32</tt> in <tt>SparcCallingConv.td</tt> introduces
-<tt>CCAssignToStack</tt>, which assigns the value to a stack slot with the
-specified size and alignment. In the example below, the first parameter, 4,
-indicates the size of the slot, and the second parameter, also 4, indicates the
-stack alignment along 4-byte units. (Special cases: if size is zero, then the
-ABI size is used; if alignment is zero, then the ABI alignment is used.)
-</p>
-
-<div class="doc_code">
-<pre>
-def CC_Sparc32 : CallingConv&lt;[
-  // All arguments get passed in integer registers if there is space.
-  CCIfType&lt;[i32, f32, f64], CCAssignToReg&lt;[I0, I1, I2, I3, I4, I5]&gt;&gt;,
-  CCAssignToStack&lt;4, 4&gt;
-]&gt;;
-</pre>
-</div>
-
-<p>
-<tt>CCDelegateTo</tt> is another commonly used interface, which tries to find a
-specified sub-calling convention, and, if a match is found, it is invoked. In
-the following example (in <tt>X86CallingConv.td</tt>), the definition of
-<tt>RetCC_X86_32_C</tt> ends with <tt>CCDelegateTo</tt>. After the current value
-is assigned to the register <tt>ST0</tt> or <tt>ST1</tt>,
-the <tt>RetCC_X86Common</tt> is invoked.
-</p>
-
-<div class="doc_code">
-<pre>
-def RetCC_X86_32_C : CallingConv&lt;[
-  CCIfType&lt;[f32], CCAssignToReg&lt;[ST0, ST1]&gt;&gt;,
-  CCIfType&lt;[f64], CCAssignToReg&lt;[ST0, ST1]&gt;&gt;,
-  CCDelegateTo&lt;RetCC_X86Common&gt;
-]&gt;;
-</pre>
-</div>
-
-<p>
-<tt>CCIfCC</tt> is an interface that attempts to match the given name to the
-current calling convention. If the name identifies the current calling
-convention, then a specified action is invoked. In the following example (in
-<tt>X86CallingConv.td</tt>), if the <tt>Fast</tt> calling convention is in use,
-then <tt>RetCC_X86_32_Fast</tt> is invoked. If the <tt>SSECall</tt> calling
-convention is in use, then <tt>RetCC_X86_32_SSE</tt> is invoked.
-</p>
-
-<div class="doc_code">
-<pre>
-def RetCC_X86_32 : CallingConv&lt;[
-  CCIfCC&lt;"CallingConv::Fast", CCDelegateTo&lt;RetCC_X86_32_Fast&gt;&gt;,
-  CCIfCC&lt;"CallingConv::X86_SSECall", CCDelegateTo&lt;RetCC_X86_32_SSE&gt;&gt;,
-  CCDelegateTo&lt;RetCC_X86_32_C&gt;
-]&gt;;
-</pre>
-</div>
-
-<p>Other calling convention interfaces include:</p>
-
-<ul>
-<li><tt>CCIf &lt;predicate, action&gt;</tt> &mdash; If the predicate matches,
-    apply the action.</li>
-
-<li><tt>CCIfInReg &lt;action&gt;</tt> &mdash; If the argument is marked with the
-    '<tt>inreg</tt>' attribute, then apply the action.</li>
-
-<li><tt>CCIfNest &lt;action&gt;</tt> &mdash; Inf the argument is marked with the
-    '<tt>nest</tt>' attribute, then apply the action.</li>
-
-<li><tt>CCIfNotVarArg &lt;action&gt;</tt> &mdash; If the current function does
-    not take a variable number of arguments, apply the action.</li>
-
-<li><tt>CCAssignToRegWithShadow &lt;registerList, shadowList&gt;</tt> &mdash;
-    similar to <tt>CCAssignToReg</tt>, but with a shadow list of registers.</li>
-
-<li><tt>CCPassByVal &lt;size, align&gt;</tt> &mdash; Assign value to a stack
-    slot with the minimum specified size and alignment.</li>
-
-<li><tt>CCPromoteToType &lt;type&gt;</tt> &mdash; Promote the current value to
-    the specified type.</li>
-
-<li><tt>CallingConv &lt;[actions]&gt;</tt> &mdash; Define each calling
-    convention that is supported.</li>
-</ul>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="assemblyPrinter">Assembly Printer</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-During the code emission stage, the code generator may utilize an LLVM pass to
-produce assembly output. To do this, you want to implement the code for a
-printer that converts LLVM IR to a GAS-format assembly language for your target
-machine, using the following steps:
-</p>
-
-<ul>
-<li>Define all the assembly strings for your target, adding them to the
-    instructions defined in the <tt>XXXInstrInfo.td</tt> file.
-    (See <a href="#InstructionSet">Instruction Set</a>.)  TableGen will produce
-    an output file (<tt>XXXGenAsmWriter.inc</tt>) with an implementation of
-    the <tt>printInstruction</tt> method for the XXXAsmPrinter class.</li>
-
-<li>Write <tt>XXXTargetAsmInfo.h</tt>, which contains the bare-bones declaration
-    of the <tt>XXXTargetAsmInfo</tt> class (a subclass
-    of <tt>TargetAsmInfo</tt>).</li>
-
-<li>Write <tt>XXXTargetAsmInfo.cpp</tt>, which contains target-specific values
-    for <tt>TargetAsmInfo</tt> properties and sometimes new implementations for
-    methods.</li>
-
-<li>Write <tt>XXXAsmPrinter.cpp</tt>, which implements the <tt>AsmPrinter</tt>
-    class that performs the LLVM-to-assembly conversion.</li>
-</ul>
-
-<p>
-The code in <tt>XXXTargetAsmInfo.h</tt> is usually a trivial declaration of the
-<tt>XXXTargetAsmInfo</tt> class for use in <tt>XXXTargetAsmInfo.cpp</tt>.
-Similarly, <tt>XXXTargetAsmInfo.cpp</tt> usually has a few declarations of
-<tt>XXXTargetAsmInfo</tt> replacement values that override the default values
-in <tt>TargetAsmInfo.cpp</tt>. For example in <tt>SparcTargetAsmInfo.cpp</tt>:
-</p>
-
-<div class="doc_code">
-<pre>
-SparcTargetAsmInfo::SparcTargetAsmInfo(const SparcTargetMachine &amp;TM) {
-  Data16bitsDirective = "\t.half\t";
-  Data32bitsDirective = "\t.word\t";
-  Data64bitsDirective = 0;  // .xword is only supported by V9.
-  ZeroDirective = "\t.skip\t";
-  CommentString = "!";
-  ConstantPoolSection = "\t.section \".rodata\",#alloc\n";
-}
-</pre>
-</div>
-
-<p>
-The X86 assembly printer implementation (<tt>X86TargetAsmInfo</tt>) is an
-example where the target specific <tt>TargetAsmInfo</tt> class uses an 
-overridden methods: <tt>ExpandInlineAsm</tt>.
-</p>
-
-<p>
-A target-specific implementation of AsmPrinter is written in
-<tt>XXXAsmPrinter.cpp</tt>, which implements the <tt>AsmPrinter</tt> class that
-converts the LLVM to printable assembly. The implementation must include the
-following headers that have declarations for the <tt>AsmPrinter</tt> and
-<tt>MachineFunctionPass</tt> classes. The <tt>MachineFunctionPass</tt> is a
-subclass of <tt>FunctionPass</tt>.
-</p>
-
-<div class="doc_code">
-<pre>
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h" 
-</pre>
-</div>
-
-<p>
-As a <tt>FunctionPass</tt>, <tt>AsmPrinter</tt> first
-calls <tt>doInitialization</tt> to set up the <tt>AsmPrinter</tt>. In
-<tt>SparcAsmPrinter</tt>, a <tt>Mangler</tt> object is instantiated to process
-variable names.
-</p>
-
-<p>
-In <tt>XXXAsmPrinter.cpp</tt>, the <tt>runOnMachineFunction</tt> method
-(declared in <tt>MachineFunctionPass</tt>) must be implemented
-for <tt>XXXAsmPrinter</tt>. In <tt>MachineFunctionPass</tt>,
-the <tt>runOnFunction</tt> method invokes <tt>runOnMachineFunction</tt>.
-Target-specific implementations of <tt>runOnMachineFunction</tt> differ, but
-generally do the following to process each machine function:
-</p>
-
-<ul>
-<li>Call <tt>SetupMachineFunction</tt> to perform initialization.</li>
-
-<li>Call <tt>EmitConstantPool</tt> to print out (to the output stream) constants
-    which have been spilled to memory.</li>
-
-<li>Call <tt>EmitJumpTableInfo</tt> to print out jump tables used by the current
-    function.</li>
-
-<li>Print out the label for the current function.</li>
-
-<li>Print out the code for the function, including basic block labels and the
-    assembly for the instruction (using <tt>printInstruction</tt>)</li>
-</ul>
-
-<p>
-The <tt>XXXAsmPrinter</tt> implementation must also include the code generated
-by TableGen that is output in the <tt>XXXGenAsmWriter.inc</tt> file. The code
-in <tt>XXXGenAsmWriter.inc</tt> contains an implementation of the
-<tt>printInstruction</tt> method that may call these methods:
-</p>
-
-<ul>
-<li><tt>printOperand</tt></li>
-
-<li><tt>printMemOperand</tt></li>
-
-<li><tt>printCCOperand (for conditional statements)</tt></li>
-
-<li><tt>printDataDirective</tt></li>
-
-<li><tt>printDeclare</tt></li>
-
-<li><tt>printImplicitDef</tt></li>
-
-<li><tt>printInlineAsm</tt></li>
-</ul>
-
-<p>
-The implementations of <tt>printDeclare</tt>, <tt>printImplicitDef</tt>,
-<tt>printInlineAsm</tt>, and <tt>printLabel</tt> in <tt>AsmPrinter.cpp</tt> are
-generally adequate for printing assembly and do not need to be
-overridden.
-</p>
-
-<p>
-The <tt>printOperand</tt> method is implemented with a long switch/case
-statement for the type of operand: register, immediate, basic block, external
-symbol, global address, constant pool index, or jump table index. For an
-instruction with a memory address operand, the <tt>printMemOperand</tt> method
-should be implemented to generate the proper output. Similarly,
-<tt>printCCOperand</tt> should be used to print a conditional operand.
-</p>
-
-<p><tt>doFinalization</tt> should be overridden in <tt>XXXAsmPrinter</tt>, and
-it should be called to shut down the assembly printer. During
-<tt>doFinalization</tt>, global variables and constants are printed to
-output.
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="subtargetSupport">Subtarget Support</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Subtarget support is used to inform the code generation process of instruction
-set variations for a given chip set.  For example, the LLVM SPARC implementation
-provided covers three major versions of the SPARC microprocessor architecture:
-Version 8 (V8, which is a 32-bit architecture), Version 9 (V9, a 64-bit
-architecture), and the UltraSPARC architecture. V8 has 16 double-precision
-floating-point registers that are also usable as either 32 single-precision or 8
-quad-precision registers.  V8 is also purely big-endian. V9 has 32
-double-precision floating-point registers that are also usable as 16
-quad-precision registers, but cannot be used as single-precision registers. The
-UltraSPARC architecture combines V9 with UltraSPARC Visual Instruction Set
-extensions.
-</p>
-
-<p>
-If subtarget support is needed, you should implement a target-specific
-XXXSubtarget class for your architecture. This class should process the
-command-line options <tt>-mcpu=</tt> and <tt>-mattr=</tt>.
-</p>
-
-<p>
-TableGen uses definitions in the <tt>Target.td</tt> and <tt>Sparc.td</tt> files
-to generate code in <tt>SparcGenSubtarget.inc</tt>. In <tt>Target.td</tt>, shown
-below, the <tt>SubtargetFeature</tt> interface is defined. The first 4 string
-parameters of the <tt>SubtargetFeature</tt> interface are a feature name, an
-attribute set by the feature, the value of the attribute, and a description of
-the feature. (The fifth parameter is a list of features whose presence is
-implied, and its default value is an empty array.)
-</p>
-
-<div class="doc_code">
-<pre>
-class SubtargetFeature&lt;string n, string a,  string v, string d,
-                       list&lt;SubtargetFeature&gt; i = []&gt; {
-  string Name = n;
-  string Attribute = a;
-  string Value = v;
-  string Desc = d;
-  list&lt;SubtargetFeature&gt; Implies = i;
-}
-</pre>
-</div>
-
-<p>
-In the <tt>Sparc.td</tt> file, the SubtargetFeature is used to define the
-following features.
-</p>
-
-<div class="doc_code">
-<pre>
-def FeatureV9 : SubtargetFeature&lt;"v9", "IsV9", "true",
-                     "Enable SPARC-V9 instructions"&gt;;
-def FeatureV8Deprecated : SubtargetFeature&lt;"deprecated-v8", 
-                     "V8DeprecatedInsts", "true",
-                     "Enable deprecated V8 instructions in V9 mode"&gt;;
-def FeatureVIS : SubtargetFeature&lt;"vis", "IsVIS", "true",
-                     "Enable UltraSPARC Visual Instruction Set extensions"&gt;;
-</pre>
-</div>
-
-<p>
-Elsewhere in <tt>Sparc.td</tt>, the Proc class is defined and then is used to
-define particular SPARC processor subtypes that may have the previously
-described features.
-</p>
-
-<div class="doc_code">
-<pre>
-class Proc&lt;string Name, list&lt;SubtargetFeature&gt; Features&gt;
-  : Processor&lt;Name, NoItineraries, Features&gt;;
-&nbsp;
-def : Proc&lt;"generic",         []&gt;;
-def : Proc&lt;"v8",              []&gt;;
-def : Proc&lt;"supersparc",      []&gt;;
-def : Proc&lt;"sparclite",       []&gt;;
-def : Proc&lt;"f934",            []&gt;;
-def : Proc&lt;"hypersparc",      []&gt;;
-def : Proc&lt;"sparclite86x",    []&gt;;
-def : Proc&lt;"sparclet",        []&gt;;
-def : Proc&lt;"tsc701",          []&gt;;
-def : Proc&lt;"v9",              [FeatureV9]&gt;;
-def : Proc&lt;"ultrasparc",      [FeatureV9, FeatureV8Deprecated]&gt;;
-def : Proc&lt;"ultrasparc3",     [FeatureV9, FeatureV8Deprecated]&gt;;
-def : Proc&lt;"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]&gt;;
-</pre>
-</div>
-
-<p>
-From <tt>Target.td</tt> and <tt>Sparc.td</tt> files, the resulting
-SparcGenSubtarget.inc specifies enum values to identify the features, arrays of
-constants to represent the CPU features and CPU subtypes, and the
-ParseSubtargetFeatures method that parses the features string that sets
-specified subtarget options. The generated <tt>SparcGenSubtarget.inc</tt> file
-should be included in the <tt>SparcSubtarget.cpp</tt>. The target-specific
-implementation of the XXXSubtarget method should follow this pseudocode:
-</p>
-
-<div class="doc_code">
-<pre>
-XXXSubtarget::XXXSubtarget(const Module &amp;M, const std::string &amp;FS) {
-  // Set the default features
-  // Determine default and user specified characteristics of the CPU
-  // Call ParseSubtargetFeatures(FS, CPU) to parse the features string
-  // Perform any additional operations
-}
-</pre>
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2>
-  <a name="jitSupport">JIT Support</a>
-</h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-The implementation of a target machine optionally includes a Just-In-Time (JIT)
-code generator that emits machine code and auxiliary structures as binary output
-that can be written directly to memory.  To do this, implement JIT code
-generation by performing the following steps:
-</p>
-
-<ul>
-<li>Write an <tt>XXXCodeEmitter.cpp</tt> file that contains a machine function
-    pass that transforms target-machine instructions into relocatable machine
-    code.</li>
-
-<li>Write an <tt>XXXJITInfo.cpp</tt> file that implements the JIT interfaces for
-    target-specific code-generation activities, such as emitting machine code
-    and stubs.</li>
-
-<li>Modify <tt>XXXTargetMachine</tt> so that it provides a
-    <tt>TargetJITInfo</tt> object through its <tt>getJITInfo</tt> method.</li>
-</ul>
-
-<p>
-There are several different approaches to writing the JIT support code. For
-instance, TableGen and target descriptor files may be used for creating a JIT
-code generator, but are not mandatory. For the Alpha and PowerPC target
-machines, TableGen is used to generate <tt>XXXGenCodeEmitter.inc</tt>, which
-contains the binary coding of machine instructions and the
-<tt>getBinaryCodeForInstr</tt> method to access those codes. Other JIT
-implementations do not.
-</p>
-
-<p>
-Both <tt>XXXJITInfo.cpp</tt> and <tt>XXXCodeEmitter.cpp</tt> must include the
-<tt>llvm/CodeGen/MachineCodeEmitter.h</tt> header file that defines the
-<tt>MachineCodeEmitter</tt> class containing code for several callback functions
-that write data (in bytes, words, strings, etc.) to the output stream.
-</p>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="mce">Machine Code Emitter</a>
-</h3>
-
-<div>
-
-<p>
-In <tt>XXXCodeEmitter.cpp</tt>, a target-specific of the <tt>Emitter</tt> class
-is implemented as a function pass (subclass
-of <tt>MachineFunctionPass</tt>). The target-specific implementation
-of <tt>runOnMachineFunction</tt> (invoked by
-<tt>runOnFunction</tt> in <tt>MachineFunctionPass</tt>) iterates through the
-<tt>MachineBasicBlock</tt> calls <tt>emitInstruction</tt> to process each
-instruction and emit binary code. <tt>emitInstruction</tt> is largely
-implemented with case statements on the instruction types defined in
-<tt>XXXInstrInfo.h</tt>. For example, in <tt>X86CodeEmitter.cpp</tt>,
-the <tt>emitInstruction</tt> method is built around the following switch/case
-statements:
-</p>
-
-<div class="doc_code">
-<pre>
-switch (Desc-&gt;TSFlags &amp; X86::FormMask) {
-case X86II::Pseudo:  // for not yet implemented instructions 
-   ...               // or pseudo-instructions
-   break;
-case X86II::RawFrm:  // for instructions with a fixed opcode value
-   ...
-   break;
-case X86II::AddRegFrm: // for instructions that have one register operand 
-   ...                 // added to their opcode
-   break;
-case X86II::MRMDestReg:// for instructions that use the Mod/RM byte
-   ...                 // to specify a destination (register)
-   break;
-case X86II::MRMDestMem:// for instructions that use the Mod/RM byte
-   ...                 // to specify a destination (memory)
-   break;
-case X86II::MRMSrcReg: // for instructions that use the Mod/RM byte
-   ...                 // to specify a source (register)
-   break;
-case X86II::MRMSrcMem: // for instructions that use the Mod/RM byte
-   ...                 // to specify a source (memory)
-   break;
-case X86II::MRM0r: case X86II::MRM1r:  // for instructions that operate on 
-case X86II::MRM2r: case X86II::MRM3r:  // a REGISTER r/m operand and
-case X86II::MRM4r: case X86II::MRM5r:  // use the Mod/RM byte and a field
-case X86II::MRM6r: case X86II::MRM7r:  // to hold extended opcode data
-   ...  
-   break;
-case X86II::MRM0m: case X86II::MRM1m:  // for instructions that operate on
-case X86II::MRM2m: case X86II::MRM3m:  // a MEMORY r/m operand and
-case X86II::MRM4m: case X86II::MRM5m:  // use the Mod/RM byte and a field
-case X86II::MRM6m: case X86II::MRM7m:  // to hold extended opcode data
-   ...  
-   break;
-case X86II::MRMInitReg: // for instructions whose source and
-   ...                  // destination are the same register
-   break;
-}
-</pre>
-</div>
-
-<p>
-The implementations of these case statements often first emit the opcode and
-then get the operand(s). Then depending upon the operand, helper methods may be
-called to process the operand(s). For example, in <tt>X86CodeEmitter.cpp</tt>,
-for the <tt>X86II::AddRegFrm</tt> case, the first data emitted
-(by <tt>emitByte</tt>) is the opcode added to the register operand. Then an
-object representing the machine operand, <tt>MO1</tt>, is extracted. The helper
-methods such as <tt>isImmediate</tt>,
-<tt>isGlobalAddress</tt>, <tt>isExternalSymbol</tt>, <tt>isConstantPoolIndex</tt>, and 
-<tt>isJumpTableIndex</tt> determine the operand
-type. (<tt>X86CodeEmitter.cpp</tt> also has private methods such
-as <tt>emitConstant</tt>, <tt>emitGlobalAddress</tt>,
-<tt>emitExternalSymbolAddress</tt>, <tt>emitConstPoolAddress</tt>,
-and <tt>emitJumpTableAddress</tt> that emit the data into the output stream.)
-</p>
-
-<div class="doc_code">
-<pre>
-case X86II::AddRegFrm:
-  MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg()));
-  
-  if (CurOp != NumOps) {
-    const MachineOperand &amp;MO1 = MI.getOperand(CurOp++);
-    unsigned Size = X86InstrInfo::sizeOfImm(Desc);
-    if (MO1.isImmediate())
-      emitConstant(MO1.getImm(), Size);
-    else {
-      unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
-        : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
-      if (Opcode == X86::MOV64ri) 
-        rt = X86::reloc_absolute_dword;  // FIXME: add X86II flag?
-      if (MO1.isGlobalAddress()) {
-        bool NeedStub = isa&lt;Function&gt;(MO1.getGlobal());
-        bool isLazy = gvNeedsLazyPtr(MO1.getGlobal());
-        emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
-                          NeedStub, isLazy);
-      } else if (MO1.isExternalSymbol())
-        emitExternalSymbolAddress(MO1.getSymbolName(), rt);
-      else if (MO1.isConstantPoolIndex())
-        emitConstPoolAddress(MO1.getIndex(), rt);
-      else if (MO1.isJumpTableIndex())
-        emitJumpTableAddress(MO1.getIndex(), rt);
-    }
-  }
-  break;
-</pre>
-</div>
-
-<p>
-In the previous example, <tt>XXXCodeEmitter.cpp</tt> uses the
-variable <tt>rt</tt>, which is a RelocationType enum that may be used to
-relocate addresses (for example, a global address with a PIC base offset). The
-<tt>RelocationType</tt> enum for that target is defined in the short
-target-specific <tt>XXXRelocations.h</tt> file. The <tt>RelocationType</tt> is used by
-the <tt>relocate</tt> method defined in <tt>XXXJITInfo.cpp</tt> to rewrite
-addresses for referenced global symbols.
-</p>
-
-<p>
-For example, <tt>X86Relocations.h</tt> specifies the following relocation types
-for the X86 addresses. In all four cases, the relocated value is added to the
-value already in memory. For <tt>reloc_pcrel_word</tt>
-and <tt>reloc_picrel_word</tt>, there is an additional initial adjustment.
-</p>
-
-<div class="doc_code">
-<pre>
-enum RelocationType {
-  reloc_pcrel_word = 0,    // add reloc value after adjusting for the PC loc
-  reloc_picrel_word = 1,   // add reloc value after adjusting for the PIC base
-  reloc_absolute_word = 2, // absolute relocation; no additional adjustment 
-  reloc_absolute_dword = 3 // absolute relocation; no additional adjustment
-};
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="targetJITInfo">Target JIT Info</a>
-</h3>
-
-<div>
-
-<p>
-<tt>XXXJITInfo.cpp</tt> implements the JIT interfaces for target-specific
-code-generation activities, such as emitting machine code and stubs. At minimum,
-a target-specific version of <tt>XXXJITInfo</tt> implements the following:
-</p>
-
-<ul>
-<li><tt>getLazyResolverFunction</tt> &mdash; Initializes the JIT, gives the
-    target a function that is used for compilation.</li>
-
-<li><tt>emitFunctionStub</tt> &mdash; Returns a native function with a specified
-    address for a callback function.</li>
-
-<li><tt>relocate</tt> &mdash; Changes the addresses of referenced globals, based
-    on relocation types.</li>
-
-<li>Callback function that are wrappers to a function stub that is used when the
-    real target is not initially known.</li>
-</ul>
-
-<p>
-<tt>getLazyResolverFunction</tt> is generally trivial to implement. It makes the
-incoming parameter as the global <tt>JITCompilerFunction</tt> and returns the
-callback function that will be used a function wrapper. For the Alpha target
-(in <tt>AlphaJITInfo.cpp</tt>), the <tt>getLazyResolverFunction</tt>
-implementation is simply:
-</p>
-
-<div class="doc_code">
-<pre>
-TargetJITInfo::LazyResolverFn AlphaJITInfo::getLazyResolverFunction(  
-                                            JITCompilerFn F) {
-  JITCompilerFunction = F;
-  return AlphaCompilationCallback;
-}
-</pre>
-</div>
-
-<p>
-For the X86 target, the <tt>getLazyResolverFunction</tt> implementation is a
-little more complication, because it returns a different callback function for
-processors with SSE instructions and XMM registers.
-</p>
-
-<p>
-The callback function initially saves and later restores the callee register
-values, incoming arguments, and frame and return address. The callback function
-needs low-level access to the registers or stack, so it is typically implemented
-with assembler.
-</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
-
-  <a href="http://www.woo.com">Mason Woo</a> and <a href="http://misha.brukman.net">Misha Brukman</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
-  <br>
-  Last modified: $Date$
-</address>
-
-</body>
-</html>
diff --git a/docs/WritingAnLLVMBackend.rst b/docs/WritingAnLLVMBackend.rst
new file mode 100644
index 0000000000..7803163ae6
--- /dev/null
+++ b/docs/WritingAnLLVMBackend.rst
@@ -0,0 +1,1835 @@
+================================
+Writing an LLVM Compiler Backend
+================================
+
+.. sectionauthor:: Mason Woo <http://www.woo.com> and Misha Brukman <http://misha.brukman.net>
+
+.. contents::
+   :local:
+
+Introduction
+============
+
+This document describes techniques for writing compiler backends that convert
+the LLVM Intermediate Representation (IR) to code for a specified machine or
+other languages.  Code intended for a specific machine can take the form of
+either assembly code or binary code (usable for a JIT compiler).
+
+The backend of LLVM features a target-independent code generator that may
+create output for several types of target CPUs --- including X86, PowerPC,
+ARM, and SPARC.  The backend may also be used to generate code targeted at SPUs
+of the Cell processor or GPUs to support the execution of compute kernels.
+
+The document focuses on existing examples found in subdirectories of
+``llvm/lib/Target`` in a downloaded LLVM release.  In particular, this document
+focuses on the example of creating a static compiler (one that emits text
+assembly) for a SPARC target, because SPARC has fairly standard
+characteristics, such as a RISC instruction set and straightforward calling
+conventions.
+
+Audience
+--------
+
+The audience for this document is anyone who needs to write an LLVM backend to
+generate code for a specific hardware or software target.
+
+Prerequisite Reading
+--------------------
+
+These essential documents must be read before reading this document:
+
+* `LLVM Language Reference Manual <LangRef.html>`_ --- a reference manual for
+  the LLVM assembly language.
+
+* :doc:`CodeGenerator` --- a guide to the components (classes and code
+  generation algorithms) for translating the LLVM internal representation into
+  machine code for a specified target.  Pay particular attention to the
+  descriptions of code generation stages: Instruction Selection, Scheduling and
+  Formation, SSA-based Optimization, Register Allocation, Prolog/Epilog Code
+  Insertion, Late Machine Code Optimizations, and Code Emission.
+
+* :doc:`TableGenFundamentals` --- a document that describes the TableGen
+  (``tblgen``) application that manages domain-specific information to support
+  LLVM code generation.  TableGen processes input from a target description
+  file (``.td`` suffix) and generates C++ code that can be used for code
+  generation.
+
+* `Writing an LLVM Pass <WritingAnLLVMPass.html>`_ --- The assembly printer is
+  a ``FunctionPass``, as are several SelectionDAG processing steps.
+
+To follow the SPARC examples in this document, have a copy of `The SPARC
+Architecture Manual, Version 8 <http://www.sparc.org/standards/V8.pdf>`_ for
+reference.  For details about the ARM instruction set, refer to the `ARM
+Architecture Reference Manual <http://infocenter.arm.com/>`_.  For more about
+the GNU Assembler format (``GAS``), see `Using As
+<http://sourceware.org/binutils/docs/as/index.html>`_, especially for the
+assembly printer.  "Using As" contains a list of target machine dependent
+features.
+
+Basic Steps
+-----------
+
+To write a compiler backend for LLVM that converts the LLVM IR to code for a
+specified target (machine or other language), follow these steps:
+
+* Create a subclass of the ``TargetMachine`` class that describes
+  characteristics of your target machine.  Copy existing examples of specific
+  ``TargetMachine`` class and header files; for example, start with
+  ``SparcTargetMachine.cpp`` and ``SparcTargetMachine.h``, but change the file
+  names for your target.  Similarly, change code that references "``Sparc``" to
+  reference your target.
+
+* Describe the register set of the target.  Use TableGen to generate code for
+  register definition, register aliases, and register classes from a
+  target-specific ``RegisterInfo.td`` input file.  You should also write
+  additional code for a subclass of the ``TargetRegisterInfo`` class that
+  represents the class register file data used for register allocation and also
+  describes the interactions between registers.
+
+* Describe the instruction set of the target.  Use TableGen to generate code
+  for target-specific instructions from target-specific versions of
+  ``TargetInstrFormats.td`` and ``TargetInstrInfo.td``.  You should write
+  additional code for a subclass of the ``TargetInstrInfo`` class to represent
+  machine instructions supported by the target machine.
+
+* Describe the selection and conversion of the LLVM IR from a Directed Acyclic
+  Graph (DAG) representation of instructions to native target-specific
+  instructions.  Use TableGen to generate code that matches patterns and
+  selects instructions based on additional information in a target-specific
+  version of ``TargetInstrInfo.td``.  Write code for ``XXXISelDAGToDAG.cpp``,
+  where ``XXX`` identifies the specific target, to perform pattern matching and
+  DAG-to-DAG instruction selection.  Also write code in ``XXXISelLowering.cpp``
+  to replace or remove operations and data types that are not supported
+  natively in a SelectionDAG.
+
+* Write code for an assembly printer that converts LLVM IR to a GAS format for
+  your target machine.  You should add assembly strings to the instructions
+  defined in your target-specific version of ``TargetInstrInfo.td``.  You
+  should also write code for a subclass of ``AsmPrinter`` that performs the
+  LLVM-to-assembly conversion and a trivial subclass of ``TargetAsmInfo``.
+
+* Optionally, add support for subtargets (i.e., variants with different
+  capabilities).  You should also write code for a subclass of the
+  ``TargetSubtarget`` class, which allows you to use the ``-mcpu=`` and
+  ``-mattr=`` command-line options.
+
+* Optionally, add JIT support and create a machine code emitter (subclass of
+  ``TargetJITInfo``) that is used to emit binary code directly into memory.
+
+In the ``.cpp`` and ``.h``. files, initially stub up these methods and then
+implement them later.  Initially, you may not know which private members that
+the class will need and which components will need to be subclassed.
+
+Preliminaries
+-------------
+
+To actually create your compiler backend, you need to create and modify a few
+files.  The absolute minimum is discussed here.  But to actually use the LLVM
+target-independent code generator, you must perform the steps described in the
+:doc:`LLVM Target-Independent Code Generator <CodeGenerator>` document.
+
+First, you should create a subdirectory under ``lib/Target`` to hold all the
+files related to your target.  If your target is called "Dummy", create the
+directory ``lib/Target/Dummy``.
+
+In this new directory, create a ``Makefile``.  It is easiest to copy a
+``Makefile`` of another target and modify it.  It should at least contain the
+``LEVEL``, ``LIBRARYNAME`` and ``TARGET`` variables, and then include
+``$(LEVEL)/Makefile.common``.  The library can be named ``LLVMDummy`` (for
+example, see the MIPS target).  Alternatively, you can split the library into
+``LLVMDummyCodeGen`` and ``LLVMDummyAsmPrinter``, the latter of which should be
+implemented in a subdirectory below ``lib/Target/Dummy`` (for example, see the
+PowerPC target).
+
+Note that these two naming schemes are hardcoded into ``llvm-config``.  Using
+any other naming scheme will confuse ``llvm-config`` and produce a lot of
+(seemingly unrelated) linker errors when linking ``llc``.
+
+To make your target actually do something, you need to implement a subclass of
+``TargetMachine``.  This implementation should typically be in the file
+``lib/Target/DummyTargetMachine.cpp``, but any file in the ``lib/Target``
+directory will be built and should work.  To use LLVM's target independent code
+generator, you should do what all current machine backends do: create a
+subclass of ``LLVMTargetMachine``.  (To create a target from scratch, create a
+subclass of ``TargetMachine``.)
+
+To get LLVM to actually build and link your target, you need to add it to the
+``TARGETS_TO_BUILD`` variable.  To do this, you modify the configure script to
+know about your target when parsing the ``--enable-targets`` option.  Search
+the configure script for ``TARGETS_TO_BUILD``, add your target to the lists
+there (some creativity required), and then reconfigure.  Alternatively, you can
+change ``autotools/configure.ac`` and regenerate configure by running
+``./autoconf/AutoRegen.sh``.
+
+Target Machine
+==============
+
+``LLVMTargetMachine`` is designed as a base class for targets implemented with
+the LLVM target-independent code generator.  The ``LLVMTargetMachine`` class
+should be specialized by a concrete target class that implements the various
+virtual methods.  ``LLVMTargetMachine`` is defined as a subclass of
+``TargetMachine`` in ``include/llvm/Target/TargetMachine.h``.  The
+``TargetMachine`` class implementation (``TargetMachine.cpp``) also processes
+numerous command-line options.
+
+To create a concrete target-specific subclass of ``LLVMTargetMachine``, start
+by copying an existing ``TargetMachine`` class and header.  You should name the
+files that you create to reflect your specific target.  For instance, for the
+SPARC target, name the files ``SparcTargetMachine.h`` and
+``SparcTargetMachine.cpp``.
+
+For a target machine ``XXX``, the implementation of ``XXXTargetMachine`` must
+have access methods to obtain objects that represent target components.  These
+methods are named ``get*Info``, and are intended to obtain the instruction set
+(``getInstrInfo``), register set (``getRegisterInfo``), stack frame layout
+(``getFrameInfo``), and similar information.  ``XXXTargetMachine`` must also
+implement the ``getDataLayout`` method to access an object with target-specific
+data characteristics, such as data type size and alignment requirements.
+
+For instance, for the SPARC target, the header file ``SparcTargetMachine.h``
+declares prototypes for several ``get*Info`` and ``getDataLayout`` methods that
+simply return a class member.
+
+.. code-block:: c++
+
+  namespace llvm {
+
+  class Module;
+
+  class SparcTargetMachine : public LLVMTargetMachine {
+    const DataLayout DataLayout;       // Calculates type size & alignment
+    SparcSubtarget Subtarget;
+    SparcInstrInfo InstrInfo;
+    TargetFrameInfo FrameInfo;
+
+  protected:
+    virtual const TargetAsmInfo *createTargetAsmInfo() const;
+
+  public:
+    SparcTargetMachine(const Module &M, const std::string &FS);
+
+    virtual const SparcInstrInfo *getInstrInfo() const {return &InstrInfo; }
+    virtual const TargetFrameInfo *getFrameInfo() const {return &FrameInfo; }
+    virtual const TargetSubtarget *getSubtargetImpl() const{return &Subtarget; }
+    virtual const TargetRegisterInfo *getRegisterInfo() const {
+      return &InstrInfo.getRegisterInfo();
+    }
+    virtual const DataLayout *getDataLayout() const { return &DataLayout; }
+    static unsigned getModuleMatchQuality(const Module &M);
+
+    // Pass Pipeline Configuration
+    virtual bool addInstSelector(PassManagerBase &PM, bool Fast);
+    virtual bool addPreEmitPass(PassManagerBase &PM, bool Fast);
+  };
+
+  } // end namespace llvm
+
+* ``getInstrInfo()``
+* ``getRegisterInfo()``
+* ``getFrameInfo()``
+* ``getDataLayout()``
+* ``getSubtargetImpl()``
+
+For some targets, you also need to support the following methods:
+
+* ``getTargetLowering()``
+* ``getJITInfo()``
+
+In addition, the ``XXXTargetMachine`` constructor should specify a
+``TargetDescription`` string that determines the data layout for the target
+machine, including characteristics such as pointer size, alignment, and
+endianness.  For example, the constructor for ``SparcTargetMachine`` contains
+the following:
+
+.. code-block:: c++
+
+  SparcTargetMachine::SparcTargetMachine(const Module &M, const std::string &FS)
+    : DataLayout("E-p:32:32-f128:128:128"),
+      Subtarget(M, FS), InstrInfo(Subtarget),
+      FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
+  }
+
+Hyphens separate portions of the ``TargetDescription`` string.
+
+* An upper-case "``E``" in the string indicates a big-endian target data model.
+  A lower-case "``e``" indicates little-endian.
+
+* "``p:``" is followed by pointer information: size, ABI alignment, and
+  preferred alignment.  If only two figures follow "``p:``", then the first
+  value is pointer size, and the second value is both ABI and preferred
+  alignment.
+
+* Then a letter for numeric type alignment: "``i``", "``f``", "``v``", or
+  "``a``" (corresponding to integer, floating point, vector, or aggregate).
+  "``i``", "``v``", or "``a``" are followed by ABI alignment and preferred
+  alignment. "``f``" is followed by three values: the first indicates the size
+  of a long double, then ABI alignment, and then ABI preferred alignment.
+
+Target Registration
+===================
+
+You must also register your target with the ``TargetRegistry``, which is what
+other LLVM tools use to be able to lookup and use your target at runtime.  The
+``TargetRegistry`` can be used directly, but for most targets there are helper
+templates which should take care of the work for you.
+
+All targets should declare a global ``Target`` object which is used to
+represent the target during registration.  Then, in the target's ``TargetInfo``
+library, the target should define that object and use the ``RegisterTarget``
+template to register the target.  For example, the Sparc registration code
+looks like this:
+
+.. code-block:: c++
+
+  Target llvm::TheSparcTarget;
+
+  extern "C" void LLVMInitializeSparcTargetInfo() {
+    RegisterTarget<Triple::sparc, /*HasJIT=*/false>
+      X(TheSparcTarget, "sparc", "Sparc");
+  }
+
+This allows the ``TargetRegistry`` to look up the target by name or by target
+triple.  In addition, most targets will also register additional features which
+are available in separate libraries.  These registration steps are separate,
+because some clients may wish to only link in some parts of the target --- the
+JIT code generator does not require the use of the assembler printer, for
+example.  Here is an example of registering the Sparc assembly printer:
+
+.. code-block:: c++
+
+  extern "C" void LLVMInitializeSparcAsmPrinter() {
+    RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
+  }
+
+For more information, see "`llvm/Target/TargetRegistry.h
+</doxygen/TargetRegistry_8h-source.html>`_".
+
+Register Set and Register Classes
+=================================
+
+You should describe a concrete target-specific class that represents the
+register file of a target machine.  This class is called ``XXXRegisterInfo``
+(where ``XXX`` identifies the target) and represents the class register file
+data that is used for register allocation.  It also describes the interactions
+between registers.
+
+You also need to define register classes to categorize related registers.  A
+register class should be added for groups of registers that are all treated the
+same way for some instruction.  Typical examples are register classes for
+integer, floating-point, or vector registers.  A register allocator allows an
+instruction to use any register in a specified register class to perform the
+instruction in a similar manner.  Register classes allocate virtual registers
+to instructions from these sets, and register classes let the
+target-independent register allocator automatically choose the actual
+registers.
+
+Much of the code for registers, including register definition, register
+aliases, and register classes, is generated by TableGen from
+``XXXRegisterInfo.td`` input files and placed in ``XXXGenRegisterInfo.h.inc``
+and ``XXXGenRegisterInfo.inc`` output files.  Some of the code in the
+implementation of ``XXXRegisterInfo`` requires hand-coding.
+
+Defining a Register
+-------------------
+
+The ``XXXRegisterInfo.td`` file typically starts with register definitions for
+a target machine.  The ``Register`` class (specified in ``Target.td``) is used
+to define an object for each register.  The specified string ``n`` becomes the
+``Name`` of the register.  The basic ``Register`` object does not have any
+subregisters and does not specify any aliases.
+
+.. code-block:: llvm
+
+  class Register<string n> {
+    string Namespace = "";
+    string AsmName = n;
+    string Name = n;
+    int SpillSize = 0;
+    int SpillAlignment = 0;
+    list<Register> Aliases = [];
+    list<Register> SubRegs = [];
+    list<int> DwarfNumbers = [];
+  }
+
+For example, in the ``X86RegisterInfo.td`` file, there are register definitions
+that utilize the ``Register`` class, such as:
+
+.. code-block:: llvm
+
+  def AL : Register<"AL">, DwarfRegNum<[0, 0, 0]>;
+
+This defines the register ``AL`` and assigns it values (with ``DwarfRegNum``)
+that are used by ``gcc``, ``gdb``, or a debug information writer to identify a
+register.  For register ``AL``, ``DwarfRegNum`` takes an array of 3 values
+representing 3 different modes: the first element is for X86-64, the second for
+exception handling (EH) on X86-32, and the third is generic. -1 is a special
+Dwarf number that indicates the gcc number is undefined, and -2 indicates the
+register number is invalid for this mode.
+
+From the previously described line in the ``X86RegisterInfo.td`` file, TableGen
+generates this code in the ``X86GenRegisterInfo.inc`` file:
+
+.. code-block:: c++
+
+  static const unsigned GR8[] = { X86::AL, ... };
+
+  const unsigned AL_AliasSet[] = { X86::AX, X86::EAX, X86::RAX, 0 };
+
+  const TargetRegisterDesc RegisterDescriptors[] = {
+    ...
+  { "AL", "AL", AL_AliasSet, Empty_SubRegsSet, Empty_SubRegsSet, AL_SuperRegsSet }, ...
+
+From the register info file, TableGen generates a ``TargetRegisterDesc`` object
+for each register.  ``TargetRegisterDesc`` is defined in
+``include/llvm/Target/TargetRegisterInfo.h`` with the following fields:
+
+.. code-block:: c++
+
+  struct TargetRegisterDesc {
+    const char     *AsmName;      // Assembly language name for the register
+    const char     *Name;         // Printable name for the reg (for debugging)
+    const unsigned *AliasSet;     // Register Alias Set
+    const unsigned *SubRegs;      // Sub-register set
+    const unsigned *ImmSubRegs;   // Immediate sub-register set
+    const unsigned *SuperRegs;    // Super-register set
+  };
+
+TableGen uses the entire target description file (``.td``) to determine text
+names for the register (in the ``AsmName`` and ``Name`` fields of
+``TargetRegisterDesc``) and the relationships of other registers to the defined
+register (in the other ``TargetRegisterDesc`` fields).  In this example, other
+definitions establish the registers "``AX``", "``EAX``", and "``RAX``" as
+aliases for one another, so TableGen generates a null-terminated array
+(``AL_AliasSet``) for this register alias set.
+
+The ``Register`` class is commonly used as a base class for more complex
+classes.  In ``Target.td``, the ``Register`` class is the base for the
+``RegisterWithSubRegs`` class that is used to define registers that need to
+specify subregisters in the ``SubRegs`` list, as shown here:
+
+.. code-block:: llvm
+
+  class RegisterWithSubRegs<string n, list<Register> subregs> : Register<n> {
+    let SubRegs = subregs;
+  }
+
+In ``SparcRegisterInfo.td``, additional register classes are defined for SPARC:
+a ``Register`` subclass, ``SparcReg``, and further subclasses: ``Ri``, ``Rf``,
+and ``Rd``.  SPARC registers are identified by 5-bit ID numbers, which is a
+feature common to these subclasses.  Note the use of "``let``" expressions to
+override values that are initially defined in a superclass (such as ``SubRegs``
+field in the ``Rd`` class).
+
+.. code-block:: llvm
+
+  class SparcReg<string n> : Register<n> {
+    field bits<5> Num;
+    let Namespace = "SP";
+  }
+  // Ri - 32-bit integer registers
+  class Ri<bits<5> num, string n> :
+  SparcReg<n> {
+    let Num = num;
+  }
+  // Rf - 32-bit floating-point registers
+  class Rf<bits<5> num, string n> :
+  SparcReg<n> {
+    let Num = num;
+  }
+  // Rd - Slots in the FP register file for 64-bit floating-point values.
+  class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
+    let Num = num;
+    let SubRegs = subregs;
+  }
+
+In the ``SparcRegisterInfo.td`` file, there are register definitions that
+utilize these subclasses of ``Register``, such as:
+
+.. code-block:: llvm
+
+  def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
+  def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
+  ...
+  def F0 : Rf< 0, "F0">, DwarfRegNum<[32]>;
+  def F1 : Rf< 1, "F1">, DwarfRegNum<[33]>;
+  ...
+  def D0 : Rd< 0, "F0", [F0, F1]>, DwarfRegNum<[32]>;
+  def D1 : Rd< 2, "F2", [F2, F3]>, DwarfRegNum<[34]>;
+
+The last two registers shown above (``D0`` and ``D1``) are double-precision
+floating-point registers that are aliases for pairs of single-precision
+floating-point sub-registers.  In addition to aliases, the sub-register and
+super-register relationships of the defined register are in fields of a
+register's ``TargetRegisterDesc``.
+
+Defining a Register Class
+-------------------------
+
+The ``RegisterClass`` class (specified in ``Target.td``) is used to define an
+object that represents a group of related registers and also defines the
+default allocation order of the registers.  A target description file
+``XXXRegisterInfo.td`` that uses ``Target.td`` can construct register classes
+using the following class:
+
+.. code-block:: llvm
+
+  class RegisterClass<string namespace,
+  list<ValueType> regTypes, int alignment, dag regList> {
+    string Namespace = namespace;
+    list<ValueType> RegTypes = regTypes;
+    int Size = 0;  // spill size, in bits; zero lets tblgen pick the size
+    int Alignment = alignment;
+
+    // CopyCost is the cost of copying a value between two registers
+    // default value 1 means a single instruction
+    // A negative value means copying is extremely expensive or impossible
+    int CopyCost = 1;
+    dag MemberList = regList;
+
+    // for register classes that are subregisters of this class
+    list<RegisterClass> SubRegClassList = [];
+
+    code MethodProtos = [{}];  // to insert arbitrary code
+    code MethodBodies = [{}];
+  }
+
+To define a ``RegisterClass``, use the following 4 arguments:
+
+* The first argument of the definition is the name of the namespace.
+
+* The second argument is a list of ``ValueType`` register type values that are
+  defined in ``include/llvm/CodeGen/ValueTypes.td``.  Defined values include
+  integer types (such as ``i16``, ``i32``, and ``i1`` for Boolean),
+  floating-point types (``f32``, ``f64``), and vector types (for example,
+  ``v8i16`` for an ``8 x i16`` vector).  All registers in a ``RegisterClass``
+  must have the same ``ValueType``, but some registers may store vector data in
+  different configurations.  For example a register that can process a 128-bit
+  vector may be able to handle 16 8-bit integer elements, 8 16-bit integers, 4
+  32-bit integers, and so on.
+
+* The third argument of the ``RegisterClass`` definition specifies the
+  alignment required of the registers when they are stored or loaded to
+  memory.
+
+* The final argument, ``regList``, specifies which registers are in this class.
+  If an alternative allocation order method is not specified, then ``regList``
+  also defines the order of allocation used by the register allocator.  Besides
+  simply listing registers with ``(add R0, R1, ...)``, more advanced set
+  operators are available.  See ``include/llvm/Target/Target.td`` for more
+  information.
+
+In ``SparcRegisterInfo.td``, three ``RegisterClass`` objects are defined:
+``FPRegs``, ``DFPRegs``, and ``IntRegs``.  For all three register classes, the
+first argument defines the namespace with the string "``SP``".  ``FPRegs``
+defines a group of 32 single-precision floating-point registers (``F0`` to
+``F31``); ``DFPRegs`` defines a group of 16 double-precision registers
+(``D0-D15``).
+
+.. code-block:: llvm
+
+  // F0, F1, F2, ..., F31
+  def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
+
+  def DFPRegs : RegisterClass<"SP", [f64], 64,
+                              (add D0, D1, D2, D3, D4, D5, D6, D7, D8,
+                                   D9, D10, D11, D12, D13, D14, D15)>;
+
+  def IntRegs : RegisterClass<"SP", [i32], 32,
+      (add L0, L1, L2, L3, L4, L5, L6, L7,
+           I0, I1, I2, I3, I4, I5,
+           O0, O1, O2, O3, O4, O5, O7,
+           G1,
+           // Non-allocatable regs:
+           G2, G3, G4,
+           O6,        // stack ptr
+           I6,        // frame ptr
+           I7,        // return address
+           G0,        // constant zero
+           G5, G6, G7 // reserved for kernel
+      )>;
+
+Using ``SparcRegisterInfo.td`` with TableGen generates several output files
+that are intended for inclusion in other source code that you write.
+``SparcRegisterInfo.td`` generates ``SparcGenRegisterInfo.h.inc``, which should
+be included in the header file for the implementation of the SPARC register
+implementation that you write (``SparcRegisterInfo.h``).  In
+``SparcGenRegisterInfo.h.inc`` a new structure is defined called
+``SparcGenRegisterInfo`` that uses ``TargetRegisterInfo`` as its base.  It also
+specifies types, based upon the defined register classes: ``DFPRegsClass``,
+``FPRegsClass``, and ``IntRegsClass``.
+
+``SparcRegisterInfo.td`` also generates ``SparcGenRegisterInfo.inc``, which is
+included at the bottom of ``SparcRegisterInfo.cpp``, the SPARC register
+implementation.  The code below shows only the generated integer registers and
+associated register classes.  The order of registers in ``IntRegs`` reflects
+the order in the definition of ``IntRegs`` in the target description file.
+
+.. code-block:: c++
+
+  // IntRegs Register Class...
+  static const unsigned IntRegs[] = {
+    SP::L0, SP::L1, SP::L2, SP::L3, SP::L4, SP::L5,
+    SP::L6, SP::L7, SP::I0, SP::I1, SP::I2, SP::I3,
+    SP::I4, SP::I5, SP::O0, SP::O1, SP::O2, SP::O3,
+    SP::O4, SP::O5, SP::O7, SP::G1, SP::G2, SP::G3,
+    SP::G4, SP::O6, SP::I6, SP::I7, SP::G0, SP::G5,
+    SP::G6, SP::G7,
+  };
+
+  // IntRegsVTs Register Class Value Types...
+  static const MVT::ValueType IntRegsVTs[] = {
+    MVT::i32, MVT::Other
+  };
+
+  namespace SP {   // Register class instances
+    DFPRegsClass    DFPRegsRegClass;
+    FPRegsClass     FPRegsRegClass;
+    IntRegsClass    IntRegsRegClass;
+  ...
+    // IntRegs Sub-register Classess...
+    static const TargetRegisterClass* const IntRegsSubRegClasses [] = {
+      NULL
+    };
+  ...
+    // IntRegs Super-register Classess...
+    static const TargetRegisterClass* const IntRegsSuperRegClasses [] = {
+      NULL
+    };
+  ...
+    // IntRegs Register Class sub-classes...
+    static const TargetRegisterClass* const IntRegsSubclasses [] = {
+      NULL
+    };
+  ...
+    // IntRegs Register Class super-classes...
+    static const TargetRegisterClass* const IntRegsSuperclasses [] = {
+      NULL
+    };
+
+    IntRegsClass::IntRegsClass() : TargetRegisterClass(IntRegsRegClassID,
+      IntRegsVTs, IntRegsSubclasses, IntRegsSuperclasses, IntRegsSubRegClasses,
+      IntRegsSuperRegClasses, 4, 4, 1, IntRegs, IntRegs + 32) {}
+  }
+
+The register allocators will avoid using reserved registers, and callee saved
+registers are not used until all the volatile registers have been used.  That
+is usually good enough, but in some cases it may be necessary to provide custom
+allocation orders.
+
+Implement a subclass of ``TargetRegisterInfo``
+----------------------------------------------
+
+The final step is to hand code portions of ``XXXRegisterInfo``, which
+implements the interface described in ``TargetRegisterInfo.h`` (see
+:ref:`TargetRegisterInfo`).  These functions return ``0``, ``NULL``, or
+``false``, unless overridden.  Here is a list of functions that are overridden
+for the SPARC implementation in ``SparcRegisterInfo.cpp``:
+
+* ``getCalleeSavedRegs`` --- Returns a list of callee-saved registers in the
+  order of the desired callee-save stack frame offset.
+
+* ``getReservedRegs`` --- Returns a bitset indexed by physical register
+  numbers, indicating if a particular register is unavailable.
+
+* ``hasFP`` --- Return a Boolean indicating if a function should have a
+  dedicated frame pointer register.
+
+* ``eliminateCallFramePseudoInstr`` --- If call frame setup or destroy pseudo
+  instructions are used, this can be called to eliminate them.
+
+* ``eliminateFrameIndex`` --- Eliminate abstract frame indices from
+  instructions that may use them.
+
+* ``emitPrologue`` --- Insert prologue code into the function.
+
+* ``emitEpilogue`` --- Insert epilogue code into the function.
+
+.. _instruction-set:
+
+Instruction Set
+===============
+
+During the early stages of code generation, the LLVM IR code is converted to a
+``SelectionDAG`` with nodes that are instances of the ``SDNode`` class
+containing target instructions.  An ``SDNode`` has an opcode, operands, type
+requirements, and operation properties.  For example, is an operation
+commutative, does an operation load from memory.  The various operation node
+types are described in the ``include/llvm/CodeGen/SelectionDAGNodes.h`` file
+(values of the ``NodeType`` enum in the ``ISD`` namespace).
+
+TableGen uses the following target description (``.td``) input files to
+generate much of the code for instruction definition:
+
+* ``Target.td`` --- Where the ``Instruction``, ``Operand``, ``InstrInfo``, and
+  other fundamental classes are defined.
+
+* ``TargetSelectionDAG.td`` --- Used by ``SelectionDAG`` instruction selection
+  generators, contains ``SDTC*`` classes (selection DAG type constraint),
+  definitions of ``SelectionDAG`` nodes (such as ``imm``, ``cond``, ``bb``,
+  ``add``, ``fadd``, ``sub``), and pattern support (``Pattern``, ``Pat``,
+  ``PatFrag``, ``PatLeaf``, ``ComplexPattern``.
+
+* ``XXXInstrFormats.td`` --- Patterns for definitions of target-specific
+  instructions.
+
+* ``XXXInstrInfo.td`` --- Target-specific definitions of instruction templates,
+  condition codes, and instructions of an instruction set.  For architecture
+  modifications, a different file name may be used.  For example, for Pentium
+  with SSE instruction, this file is ``X86InstrSSE.td``, and for Pentium with
+  MMX, this file is ``X86InstrMMX.td``.
+
+There is also a target-specific ``XXX.td`` file, where ``XXX`` is the name of
+the target.  The ``XXX.td`` file includes the other ``.td`` input files, but
+its contents are only directly important for subtargets.
+
+You should describe a concrete target-specific class ``XXXInstrInfo`` that
+represents machine instructions supported by a target machine.
+``XXXInstrInfo`` contains an array of ``XXXInstrDescriptor`` objects, each of
+which describes one instruction.  An instruction descriptor defines:
+
+* Opcode mnemonic
+* Number of operands
+* List of implicit register definitions and uses
+* Target-independent properties (such as memory access, is commutable)
+* Target-specific flags
+
+The Instruction class (defined in ``Target.td``) is mostly used as a base for
+more complex instruction classes.
+
+.. code-block:: llvm
+
+  class Instruction {
+    string Namespace = "";
+    dag OutOperandList;    // A dag containing the MI def operand list.
+    dag InOperandList;     // A dag containing the MI use operand list.
+    string AsmString = ""; // The .s format to print the instruction with.
+    list<dag> Pattern;     // Set to the DAG pattern for this instruction.
+    list<Register> Uses = [];
+    list<Register> Defs = [];
+    list<Predicate> Predicates = [];  // predicates turned into isel match code
+    ... remainder not shown for space ...
+  }
+
+A ``SelectionDAG`` node (``SDNode``) should contain an object representing a
+target-specific instruction that is defined in ``XXXInstrInfo.td``.  The
+instruction objects should represent instructions from the architecture manual
+of the target machine (such as the SPARC Architecture Manual for the SPARC
+target).
+
+A single instruction from the architecture manual is often modeled as multiple
+target instructions, depending upon its operands.  For example, a manual might
+describe an add instruction that takes a register or an immediate operand.  An
+LLVM target could model this with two instructions named ``ADDri`` and
+``ADDrr``.
+
+You should define a class for each instruction category and define each opcode
+as a subclass of the category with appropriate parameters such as the fixed
+binary encoding of opcodes and extended opcodes.  You should map the register
+bits to the bits of the instruction in which they are encoded (for the JIT).
+Also you should specify how the instruction should be printed when the
+automatic assembly printer is used.
+
+As is described in the SPARC Architecture Manual, Version 8, there are three
+major 32-bit formats for instructions.  Format 1 is only for the ``CALL``
+instruction.  Format 2 is for branch on condition codes and ``SETHI`` (set high
+bits of a register) instructions.  Format 3 is for other instructions.
+
+Each of these formats has corresponding classes in ``SparcInstrFormat.td``.
+``InstSP`` is a base class for other instruction classes.  Additional base
+classes are specified for more precise formats: for example in
+``SparcInstrFormat.td``, ``F2_1`` is for ``SETHI``, and ``F2_2`` is for
+branches.  There are three other base classes: ``F3_1`` for register/register
+operations, ``F3_2`` for register/immediate operations, and ``F3_3`` for
+floating-point operations.  ``SparcInstrInfo.td`` also adds the base class
+``Pseudo`` for synthetic SPARC instructions.
+
+``SparcInstrInfo.td`` largely consists of operand and instruction definitions
+for the SPARC target.  In ``SparcInstrInfo.td``, the following target
+description file entry, ``LDrr``, defines the Load Integer instruction for a
+Word (the ``LD`` SPARC opcode) from a memory address to a register.  The first
+parameter, the value 3 (``11``\ :sub:`2`), is the operation value for this
+category of operation.  The second parameter (``000000``\ :sub:`2`) is the
+specific operation value for ``LD``/Load Word.  The third parameter is the
+output destination, which is a register operand and defined in the ``Register``
+target description file (``IntRegs``).
+
+.. code-block:: llvm
+
+  def LDrr : F3_1 <3, 0b000000, (outs IntRegs:$dst), (ins MEMrr:$addr),
+                   "ld [$addr], $dst",
+                   [(set IntRegs:$dst, (load ADDRrr:$addr))]>;
+
+The fourth parameter is the input source, which uses the address operand
+``MEMrr`` that is defined earlier in ``SparcInstrInfo.td``:
+
+.. code-block:: llvm
+
+  def MEMrr : Operand<i32> {
+    let PrintMethod = "printMemOperand";
+    let MIOperandInfo = (ops IntRegs, IntRegs);
+  }
+
+The fifth parameter is a string that is used by the assembly printer and can be
+left as an empty string until the assembly printer interface is implemented.
+The sixth and final parameter is the pattern used to match the instruction
+during the SelectionDAG Select Phase described in :doc:`CodeGenerator`.
+This parameter is detailed in the next section, :ref:`instruction-selector`.
+
+Instruction class definitions are not overloaded for different operand types,
+so separate versions of instructions are needed for register, memory, or
+immediate value operands.  For example, to perform a Load Integer instruction
+for a Word from an immediate operand to a register, the following instruction
+class is defined:
+
+.. code-block:: llvm
+
+  def LDri : F3_2 <3, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr),
+                   "ld [$addr], $dst",
+                   [(set IntRegs:$dst, (load ADDRri:$addr))]>;
+
+Writing these definitions for so many similar instructions can involve a lot of
+cut and paste.  In ``.td`` files, the ``multiclass`` directive enables the
+creation of templates to define several instruction classes at once (using the
+``defm`` directive).  For example in ``SparcInstrInfo.td``, the ``multiclass``
+pattern ``F3_12`` is defined to create 2 instruction classes each time
+``F3_12`` is invoked:
+
+.. code-block:: llvm
+
+  multiclass F3_12 <string OpcStr, bits<6> Op3Val, SDNode OpNode> {
+    def rr  : F3_1 <2, Op3Val,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                   !strconcat(OpcStr, " $b, $c, $dst"),
+                   [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+    def ri  : F3_2 <2, Op3Val,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+                   !strconcat(OpcStr, " $b, $c, $dst"),
+                   [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>;
+  }
+
+So when the ``defm`` directive is used for the ``XOR`` and ``ADD``
+instructions, as seen below, it creates four instruction objects: ``XORrr``,
+``XORri``, ``ADDrr``, and ``ADDri``.
+
+.. code-block:: llvm
+
+  defm XOR   : F3_12<"xor", 0b000011, xor>;
+  defm ADD   : F3_12<"add", 0b000000, add>;
+
+``SparcInstrInfo.td`` also includes definitions for condition codes that are
+referenced by branch instructions.  The following definitions in
+``SparcInstrInfo.td`` indicate the bit location of the SPARC condition code.
+For example, the 10\ :sup:`th` bit represents the "greater than" condition for
+integers, and the 22\ :sup:`nd` bit represents the "greater than" condition for
+floats.
+
+.. code-block:: llvm
+
+  def ICC_NE  : ICC_VAL< 9>;  // Not Equal
+  def ICC_E   : ICC_VAL< 1>;  // Equal
+  def ICC_G   : ICC_VAL<10>;  // Greater
+  ...
+  def FCC_U   : FCC_VAL<23>;  // Unordered
+  def FCC_G   : FCC_VAL<22>;  // Greater
+  def FCC_UG  : FCC_VAL<21>;  // Unordered or Greater
+  ...
+
+(Note that ``Sparc.h`` also defines enums that correspond to the same SPARC
+condition codes.  Care must be taken to ensure the values in ``Sparc.h``
+correspond to the values in ``SparcInstrInfo.td``.  I.e., ``SPCC::ICC_NE = 9``,
+``SPCC::FCC_U = 23`` and so on.)
+
+Instruction Operand Mapping
+---------------------------
+
+The code generator backend maps instruction operands to fields in the
+instruction.  Operands are assigned to unbound fields in the instruction in the
+order they are defined.  Fields are bound when they are assigned a value.  For
+example, the Sparc target defines the ``XNORrr`` instruction as a ``F3_1``
+format instruction having three operands.
+
+.. code-block:: llvm
+
+  def XNORrr  : F3_1<2, 0b000111,
+                     (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                     "xnor $b, $c, $dst",
+                     [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>;
+
+The instruction templates in ``SparcInstrFormats.td`` show the base class for
+``F3_1`` is ``InstSP``.
+
+.. code-block:: llvm
+
+  class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction {
+    field bits<32> Inst;
+    let Namespace = "SP";
+    bits<2> op;
+    let Inst{31-30} = op;
+    dag OutOperandList = outs;
+    dag InOperandList = ins;
+    let AsmString   = asmstr;
+    let Pattern = pattern;
+  }
+
+``InstSP`` leaves the ``op`` field unbound.
+
+.. code-block:: llvm
+
+  class F3<dag outs, dag ins, string asmstr, list<dag> pattern>
+      : InstSP<outs, ins, asmstr, pattern> {
+    bits<5> rd;
+    bits<6> op3;
+    bits<5> rs1;
+    let op{1} = 1;   // Op = 2 or 3
+    let Inst{29-25} = rd;
+    let Inst{24-19} = op3;
+    let Inst{18-14} = rs1;
+  }
+
+``F3`` binds the ``op`` field and defines the ``rd``, ``op3``, and ``rs1``
+fields.  ``F3`` format instructions will bind the operands ``rd``, ``op3``, and
+``rs1`` fields.
+
+.. code-block:: llvm
+
+  class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
+             string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+    bits<8> asi = 0; // asi not currently used
+    bits<5> rs2;
+    let op         = opVal;
+    let op3        = op3val;
+    let Inst{13}   = 0;     // i field = 0
+    let Inst{12-5} = asi;   // address space identifier
+    let Inst{4-0}  = rs2;
+  }
+
+``F3_1`` binds the ``op3`` field and defines the ``rs2`` fields.  ``F3_1``
+format instructions will bind the operands to the ``rd``, ``rs1``, and ``rs2``
+fields.  This results in the ``XNORrr`` instruction binding ``$dst``, ``$b``,
+and ``$c`` operands to the ``rd``, ``rs1``, and ``rs2`` fields respectively.
+
+Instruction Relation Mapping
+----------------------------
+
+This TableGen feature is used to relate instructions with each other.  It is
+particularly useful when you have multiple instruction formats and need to
+switch between them after instruction selection.  This entire feature is driven
+by relation models which can be defined in ``XXXInstrInfo.td`` files
+according to the target-specific instruction set.  Relation models are defined
+using ``InstrMapping`` class as a base.  TableGen parses all the models
+and generates instruction relation maps using the specified information.
+Relation maps are emitted as tables in the ``XXXGenInstrInfo.inc`` file
+along with the functions to query them.  For the detailed information on how to
+use this feature, please refer to :doc:`HowToUseInstrMappings`.
+
+Implement a subclass of ``TargetInstrInfo``
+-------------------------------------------
+
+The final step is to hand code portions of ``XXXInstrInfo``, which implements
+the interface described in ``TargetInstrInfo.h`` (see :ref:`TargetInstrInfo`).
+These functions return ``0`` or a Boolean or they assert, unless overridden.
+Here's a list of functions that are overridden for the SPARC implementation in
+``SparcInstrInfo.cpp``:
+
+* ``isLoadFromStackSlot`` --- If the specified machine instruction is a direct
+  load from a stack slot, return the register number of the destination and the
+  ``FrameIndex`` of the stack slot.
+
+* ``isStoreToStackSlot`` --- If the specified machine instruction is a direct
+  store to a stack slot, return the register number of the destination and the
+  ``FrameIndex`` of the stack slot.
+
+* ``copyPhysReg`` --- Copy values between a pair of physical registers.
+
+* ``storeRegToStackSlot`` --- Store a register value to a stack slot.
+
+* ``loadRegFromStackSlot`` --- Load a register value from a stack slot.
+
+* ``storeRegToAddr`` --- Store a register value to memory.
+
+* ``loadRegFromAddr`` --- Load a register value from memory.
+
+* ``foldMemoryOperand`` --- Attempt to combine instructions of any load or
+  store instruction for the specified operand(s).
+
+Branch Folding and If Conversion
+--------------------------------
+
+Performance can be improved by combining instructions or by eliminating
+instructions that are never reached.  The ``AnalyzeBranch`` method in
+``XXXInstrInfo`` may be implemented to examine conditional instructions and
+remove unnecessary instructions.  ``AnalyzeBranch`` looks at the end of a
+machine basic block (MBB) for opportunities for improvement, such as branch
+folding and if conversion.  The ``BranchFolder`` and ``IfConverter`` machine
+function passes (see the source files ``BranchFolding.cpp`` and
+``IfConversion.cpp`` in the ``lib/CodeGen`` directory) call ``AnalyzeBranch``
+to improve the control flow graph that represents the instructions.
+
+Several implementations of ``AnalyzeBranch`` (for ARM, Alpha, and X86) can be
+examined as models for your own ``AnalyzeBranch`` implementation.  Since SPARC
+does not implement a useful ``AnalyzeBranch``, the ARM target implementation is
+shown below.
+
+``AnalyzeBranch`` returns a Boolean value and takes four parameters:
+
+* ``MachineBasicBlock &MBB`` --- The incoming block to be examined.
+
+* ``MachineBasicBlock *&TBB`` --- A destination block that is returned.  For a
+  conditional branch that evaluates to true, ``TBB`` is the destination.
+
+* ``MachineBasicBlock *&FBB`` --- For a conditional branch that evaluates to
+  false, ``FBB`` is returned as the destination.
+
+* ``std::vector<MachineOperand> &Cond`` --- List of operands to evaluate a
+  condition for a conditional branch.
+
+In the simplest case, if a block ends without a branch, then it falls through
+to the successor block.  No destination blocks are specified for either ``TBB``
+or ``FBB``, so both parameters return ``NULL``.  The start of the
+``AnalyzeBranch`` (see code below for the ARM target) shows the function
+parameters and the code for the simplest case.
+
+.. code-block:: c++
+
+  bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                   MachineBasicBlock *&TBB,
+                                   MachineBasicBlock *&FBB,
+                                   std::vector<MachineOperand> &Cond) const
+  {
+    MachineBasicBlock::iterator I = MBB.end();
+    if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+      return false;
+
+If a block ends with a single unconditional branch instruction, then
+``AnalyzeBranch`` (shown below) should return the destination of that branch in
+the ``TBB`` parameter.
+
+.. code-block:: c++
+
+    if (LastOpc == ARM::B || LastOpc == ARM::tB) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+
+If a block ends with two unconditional branches, then the second branch is
+never reached.  In that situation, as shown below, remove the last branch
+instruction and return the penultimate branch in the ``TBB`` parameter.
+
+.. code-block:: c++
+
+    if ((SecondLastOpc == ARM::B || SecondLastOpc == ARM::tB) &&
+        (LastOpc == ARM::B || LastOpc == ARM::tB)) {
+      TBB = SecondLastInst->getOperand(0).getMBB();
+      I = LastInst;
+      I->eraseFromParent();
+      return false;
+    }
+
+A block may end with a single conditional branch instruction that falls through
+to successor block if the condition evaluates to false.  In that case,
+``AnalyzeBranch`` (shown below) should return the destination of that
+conditional branch in the ``TBB`` parameter and a list of operands in the
+``Cond`` parameter to evaluate the condition.
+
+.. code-block:: c++
+
+    if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) {
+      // Block ends with fall-through condbranch.
+      TBB = LastInst->getOperand(0).getMBB();
+      Cond.push_back(LastInst->getOperand(1));
+      Cond.push_back(LastInst->getOperand(2));
+      return false;
+    }
+
+If a block ends with both a conditional branch and an ensuing unconditional
+branch, then ``AnalyzeBranch`` (shown below) should return the conditional
+branch destination (assuming it corresponds to a conditional evaluation of
+"``true``") in the ``TBB`` parameter and the unconditional branch destination
+in the ``FBB`` (corresponding to a conditional evaluation of "``false``").  A
+list of operands to evaluate the condition should be returned in the ``Cond``
+parameter.
+
+.. code-block:: c++
+
+    unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+    if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) ||
+        (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) {
+      TBB =  SecondLastInst->getOperand(0).getMBB();
+      Cond.push_back(SecondLastInst->getOperand(1));
+      Cond.push_back(SecondLastInst->getOperand(2));
+      FBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+
+For the last two cases (ending with a single conditional branch or ending with
+one conditional and one unconditional branch), the operands returned in the
+``Cond`` parameter can be passed to methods of other instructions to create new
+branches or perform other operations.  An implementation of ``AnalyzeBranch``
+requires the helper methods ``RemoveBranch`` and ``InsertBranch`` to manage
+subsequent operations.
+
+``AnalyzeBranch`` should return false indicating success in most circumstances.
+``AnalyzeBranch`` should only return true when the method is stumped about what
+to do, for example, if a block has three terminating branches.
+``AnalyzeBranch`` may return true if it encounters a terminator it cannot
+handle, such as an indirect branch.
+
+.. _instruction-selector:
+
+Instruction Selector
+====================
+
+LLVM uses a ``SelectionDAG`` to represent LLVM IR instructions, and nodes of
+the ``SelectionDAG`` ideally represent native target instructions.  During code
+generation, instruction selection passes are performed to convert non-native
+DAG instructions into native target-specific instructions.  The pass described
+in ``XXXISelDAGToDAG.cpp`` is used to match patterns and perform DAG-to-DAG
+instruction selection.  Optionally, a pass may be defined (in
+``XXXBranchSelector.cpp``) to perform similar DAG-to-DAG operations for branch
+instructions.  Later, the code in ``XXXISelLowering.cpp`` replaces or removes
+operations and data types not supported natively (legalizes) in a
+``SelectionDAG``.
+
+TableGen generates code for instruction selection using the following target
+description input files:
+
+* ``XXXInstrInfo.td`` --- Contains definitions of instructions in a
+  target-specific instruction set, generates ``XXXGenDAGISel.inc``, which is
+  included in ``XXXISelDAGToDAG.cpp``.
+
+* ``XXXCallingConv.td`` --- Contains the calling and return value conventions
+  for the target architecture, and it generates ``XXXGenCallingConv.inc``,
+  which is included in ``XXXISelLowering.cpp``.
+
+The implementation of an instruction selection pass must include a header that
+declares the ``FunctionPass`` class or a subclass of ``FunctionPass``.  In
+``XXXTargetMachine.cpp``, a Pass Manager (PM) should add each instruction
+selection pass into the queue of passes to run.
+
+The LLVM static compiler (``llc``) is an excellent tool for visualizing the
+contents of DAGs.  To display the ``SelectionDAG`` before or after specific
+processing phases, use the command line options for ``llc``, described at
+:ref:`SelectionDAG-Process`.
+
+To describe instruction selector behavior, you should add patterns for lowering
+LLVM code into a ``SelectionDAG`` as the last parameter of the instruction
+definitions in ``XXXInstrInfo.td``.  For example, in ``SparcInstrInfo.td``,
+this entry defines a register store operation, and the last parameter describes
+a pattern with the store DAG operator.
+
+.. code-block:: llvm
+
+  def STrr  : F3_1< 3, 0b000100, (outs), (ins MEMrr:$addr, IntRegs:$src),
+                   "st $src, [$addr]", [(store IntRegs:$src, ADDRrr:$addr)]>;
+
+``ADDRrr`` is a memory mode that is also defined in ``SparcInstrInfo.td``:
+
+.. code-block:: llvm
+
+  def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+
+The definition of ``ADDRrr`` refers to ``SelectADDRrr``, which is a function
+defined in an implementation of the Instructor Selector (such as
+``SparcISelDAGToDAG.cpp``).
+
+In ``lib/Target/TargetSelectionDAG.td``, the DAG operator for store is defined
+below:
+
+.. code-block:: llvm
+
+  def store : PatFrag<(ops node:$val, node:$ptr),
+                      (st node:$val, node:$ptr), [{
+    if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+      return !ST->isTruncatingStore() &&
+             ST->getAddressingMode() == ISD::UNINDEXED;
+    return false;
+  }]>;
+
+``XXXInstrInfo.td`` also generates (in ``XXXGenDAGISel.inc``) the
+``SelectCode`` method that is used to call the appropriate processing method
+for an instruction.  In this example, ``SelectCode`` calls ``Select_ISD_STORE``
+for the ``ISD::STORE`` opcode.
+
+.. code-block:: c++
+
+  SDNode *SelectCode(SDValue N) {
+    ...
+    MVT::ValueType NVT = N.getNode()->getValueType(0);
+    switch (N.getOpcode()) {
+    case ISD::STORE: {
+      switch (NVT) {
+      default:
+        return Select_ISD_STORE(N);
+        break;
+      }
+      break;
+    }
+    ...
+
+The pattern for ``STrr`` is matched, so elsewhere in ``XXXGenDAGISel.inc``,
+code for ``STrr`` is created for ``Select_ISD_STORE``.  The ``Emit_22`` method
+is also generated in ``XXXGenDAGISel.inc`` to complete the processing of this
+instruction.
+
+.. code-block:: c++
+
+  SDNode *Select_ISD_STORE(const SDValue &N) {
+    SDValue Chain = N.getOperand(0);
+    if (Predicate_store(N.getNode())) {
+      SDValue N1 = N.getOperand(1);
+      SDValue N2 = N.getOperand(2);
+      SDValue CPTmp0;
+      SDValue CPTmp1;
+
+      // Pattern: (st:void IntRegs:i32:$src,
+      //           ADDRrr:i32:$addr)<<P:Predicate_store>>
+      // Emits: (STrr:void ADDRrr:i32:$addr, IntRegs:i32:$src)
+      // Pattern complexity = 13  cost = 1  size = 0
+      if (SelectADDRrr(N, N2, CPTmp0, CPTmp1) &&
+          N1.getNode()->getValueType(0) == MVT::i32 &&
+          N2.getNode()->getValueType(0) == MVT::i32) {
+        return Emit_22(N, SP::STrr, CPTmp0, CPTmp1);
+      }
+  ...
+
+The SelectionDAG Legalize Phase
+-------------------------------
+
+The Legalize phase converts a DAG to use types and operations that are natively
+supported by the target.  For natively unsupported types and operations, you
+need to add code to the target-specific ``XXXTargetLowering`` implementation to
+convert unsupported types and operations to supported ones.
+
+In the constructor for the ``XXXTargetLowering`` class, first use the
+``addRegisterClass`` method to specify which types are supported and which
+register classes are associated with them.  The code for the register classes
+are generated by TableGen from ``XXXRegisterInfo.td`` and placed in
+``XXXGenRegisterInfo.h.inc``.  For example, the implementation of the
+constructor for the SparcTargetLowering class (in ``SparcISelLowering.cpp``)
+starts with the following code:
+
+.. code-block:: c++
+
+  addRegisterClass(MVT::i32, SP::IntRegsRegisterClass);
+  addRegisterClass(MVT::f32, SP::FPRegsRegisterClass);
+  addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass);
+
+You should examine the node types in the ``ISD`` namespace
+(``include/llvm/CodeGen/SelectionDAGNodes.h``) and determine which operations
+the target natively supports.  For operations that do **not** have native
+support, add a callback to the constructor for the ``XXXTargetLowering`` class,
+so the instruction selection process knows what to do.  The ``TargetLowering``
+class callback methods (declared in ``llvm/Target/TargetLowering.h``) are:
+
+* ``setOperationAction`` --- General operation.
+* ``setLoadExtAction`` --- Load with extension.
+* ``setTruncStoreAction`` --- Truncating store.
+* ``setIndexedLoadAction`` --- Indexed load.
+* ``setIndexedStoreAction`` --- Indexed store.
+* ``setConvertAction`` --- Type conversion.
+* ``setCondCodeAction`` --- Support for a given condition code.
+
+Note: on older releases, ``setLoadXAction`` is used instead of
+``setLoadExtAction``.  Also, on older releases, ``setCondCodeAction`` may not
+be supported.  Examine your release to see what methods are specifically
+supported.
+
+These callbacks are used to determine that an operation does or does not work
+with a specified type (or types).  And in all cases, the third parameter is a
+``LegalAction`` type enum value: ``Promote``, ``Expand``, ``Custom``, or
+``Legal``.  ``SparcISelLowering.cpp`` contains examples of all four
+``LegalAction`` values.
+
+Promote
+^^^^^^^
+
+For an operation without native support for a given type, the specified type
+may be promoted to a larger type that is supported.  For example, SPARC does
+not support a sign-extending load for Boolean values (``i1`` type), so in
+``SparcISelLowering.cpp`` the third parameter below, ``Promote``, changes
+``i1`` type values to a large type before loading.
+
+.. code-block:: c++
+
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+Expand
+^^^^^^
+
+For a type without native support, a value may need to be broken down further,
+rather than promoted.  For an operation without native support, a combination
+of other operations may be used to similar effect.  In SPARC, the
+floating-point sine and cosine trig operations are supported by expansion to
+other operations, as indicated by the third parameter, ``Expand``, to
+``setOperationAction``:
+
+.. code-block:: c++
+
+  setOperationAction(ISD::FSIN, MVT::f32, Expand);
+  setOperationAction(ISD::FCOS, MVT::f32, Expand);
+
+Custom
+^^^^^^
+
+For some operations, simple type promotion or operation expansion may be
+insufficient.  In some cases, a special intrinsic function must be implemented.
+
+For example, a constant value may require special treatment, or an operation
+may require spilling and restoring registers in the stack and working with
+register allocators.
+
+As seen in ``SparcISelLowering.cpp`` code below, to perform a type conversion
+from a floating point value to a signed integer, first the
+``setOperationAction`` should be called with ``Custom`` as the third parameter:
+
+.. code-block:: c++
+
+  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
+In the ``LowerOperation`` method, for each ``Custom`` operation, a case
+statement should be added to indicate what function to call.  In the following
+code, an ``FP_TO_SINT`` opcode will call the ``LowerFP_TO_SINT`` method:
+
+.. code-block:: c++
+
+  SDValue SparcTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+    switch (Op.getOpcode()) {
+    case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+    ...
+    }
+  }
+
+Finally, the ``LowerFP_TO_SINT`` method is implemented, using an FP register to
+convert the floating-point value to an integer.
+
+.. code-block:: c++
+
+  static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+    assert(Op.getValueType() == MVT::i32);
+    Op = DAG.getNode(SPISD::FTOI, MVT::f32, Op.getOperand(0));
+    return DAG.getNode(ISD::BITCAST, MVT::i32, Op);
+  }
+
+Legal
+^^^^^
+
+The ``Legal`` ``LegalizeAction`` enum value simply indicates that an operation
+**is** natively supported.  ``Legal`` represents the default condition, so it
+is rarely used.  In ``SparcISelLowering.cpp``, the action for ``CTPOP`` (an
+operation to count the bits set in an integer) is natively supported only for
+SPARC v9.  The following code enables the ``Expand`` conversion technique for
+non-v9 SPARC implementations.
+
+.. code-block:: c++
+
+  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+  ...
+  if (TM.getSubtarget<SparcSubtarget>().isV9())
+    setOperationAction(ISD::CTPOP, MVT::i32, Legal);
+
+Calling Conventions
+-------------------
+
+To support target-specific calling conventions, ``XXXGenCallingConv.td`` uses
+interfaces (such as ``CCIfType`` and ``CCAssignToReg``) that are defined in
+``lib/Target/TargetCallingConv.td``.  TableGen can take the target descriptor
+file ``XXXGenCallingConv.td`` and generate the header file
+``XXXGenCallingConv.inc``, which is typically included in
+``XXXISelLowering.cpp``.  You can use the interfaces in
+``TargetCallingConv.td`` to specify:
+
+* The order of parameter allocation.
+
+* Where parameters and return values are placed (that is, on the stack or in
+  registers).
+
+* Which registers may be used.
+
+* Whether the caller or callee unwinds the stack.
+
+The following example demonstrates the use of the ``CCIfType`` and
+``CCAssignToReg`` interfaces.  If the ``CCIfType`` predicate is true (that is,
+if the current argument is of type ``f32`` or ``f64``), then the action is
+performed.  In this case, the ``CCAssignToReg`` action assigns the argument
+value to the first available register: either ``R0`` or ``R1``.
+
+.. code-block:: llvm
+
+  CCIfType<[f32,f64], CCAssignToReg<[R0, R1]>>
+
+``SparcCallingConv.td`` contains definitions for a target-specific return-value
+calling convention (``RetCC_Sparc32``) and a basic 32-bit C calling convention
+(``CC_Sparc32``).  The definition of ``RetCC_Sparc32`` (shown below) indicates
+which registers are used for specified scalar return types.  A single-precision
+float is returned to register ``F0``, and a double-precision float goes to
+register ``D0``.  A 32-bit integer is returned in register ``I0`` or ``I1``.
+
+.. code-block:: llvm
+
+  def RetCC_Sparc32 : CallingConv<[
+    CCIfType<[i32], CCAssignToReg<[I0, I1]>>,
+    CCIfType<[f32], CCAssignToReg<[F0]>>,
+    CCIfType<[f64], CCAssignToReg<[D0]>>
+  ]>;
+
+The definition of ``CC_Sparc32`` in ``SparcCallingConv.td`` introduces
+``CCAssignToStack``, which assigns the value to a stack slot with the specified
+size and alignment.  In the example below, the first parameter, 4, indicates
+the size of the slot, and the second parameter, also 4, indicates the stack
+alignment along 4-byte units.  (Special cases: if size is zero, then the ABI
+size is used; if alignment is zero, then the ABI alignment is used.)
+
+.. code-block:: llvm
+
+  def CC_Sparc32 : CallingConv<[
+    // All arguments get passed in integer registers if there is space.
+    CCIfType<[i32, f32, f64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+    CCAssignToStack<4, 4>
+  ]>;
+
+``CCDelegateTo`` is another commonly used interface, which tries to find a
+specified sub-calling convention, and, if a match is found, it is invoked.  In
+the following example (in ``X86CallingConv.td``), the definition of
+``RetCC_X86_32_C`` ends with ``CCDelegateTo``.  After the current value is
+assigned to the register ``ST0`` or ``ST1``, the ``RetCC_X86Common`` is
+invoked.
+
+.. code-block:: llvm
+
+  def RetCC_X86_32_C : CallingConv<[
+    CCIfType<[f32], CCAssignToReg<[ST0, ST1]>>,
+    CCIfType<[f64], CCAssignToReg<[ST0, ST1]>>,
+    CCDelegateTo<RetCC_X86Common>
+  ]>;
+
+``CCIfCC`` is an interface that attempts to match the given name to the current
+calling convention.  If the name identifies the current calling convention,
+then a specified action is invoked.  In the following example (in
+``X86CallingConv.td``), if the ``Fast`` calling convention is in use, then
+``RetCC_X86_32_Fast`` is invoked.  If the ``SSECall`` calling convention is in
+use, then ``RetCC_X86_32_SSE`` is invoked.
+
+.. code-block:: llvm
+
+  def RetCC_X86_32 : CallingConv<[
+    CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
+    CCIfCC<"CallingConv::X86_SSECall", CCDelegateTo<RetCC_X86_32_SSE>>,
+    CCDelegateTo<RetCC_X86_32_C>
+  ]>;
+
+Other calling convention interfaces include:
+
+* ``CCIf <predicate, action>`` --- If the predicate matches, apply the action.
+
+* ``CCIfInReg <action>`` --- If the argument is marked with the "``inreg``"
+  attribute, then apply the action.
+
+* ``CCIfNest <action>`` --- If the argument is marked with the "``nest``"
+  attribute, then apply the action.
+
+* ``CCIfNotVarArg <action>`` --- If the current function does not take a
+  variable number of arguments, apply the action.
+
+* ``CCAssignToRegWithShadow <registerList, shadowList>`` --- similar to
+  ``CCAssignToReg``, but with a shadow list of registers.
+
+* ``CCPassByVal <size, align>`` --- Assign value to a stack slot with the
+  minimum specified size and alignment.
+
+* ``CCPromoteToType <type>`` --- Promote the current value to the specified
+  type.
+
+* ``CallingConv <[actions]>`` --- Define each calling convention that is
+  supported.
+
+Assembly Printer
+================
+
+During the code emission stage, the code generator may utilize an LLVM pass to
+produce assembly output.  To do this, you want to implement the code for a
+printer that converts LLVM IR to a GAS-format assembly language for your target
+machine, using the following steps:
+
+* Define all the assembly strings for your target, adding them to the
+  instructions defined in the ``XXXInstrInfo.td`` file.  (See
+  :ref:`instruction-set`.)  TableGen will produce an output file
+  (``XXXGenAsmWriter.inc``) with an implementation of the ``printInstruction``
+  method for the ``XXXAsmPrinter`` class.
+
+* Write ``XXXTargetAsmInfo.h``, which contains the bare-bones declaration of
+  the ``XXXTargetAsmInfo`` class (a subclass of ``TargetAsmInfo``).
+
+* Write ``XXXTargetAsmInfo.cpp``, which contains target-specific values for
+  ``TargetAsmInfo`` properties and sometimes new implementations for methods.
+
+* Write ``XXXAsmPrinter.cpp``, which implements the ``AsmPrinter`` class that
+  performs the LLVM-to-assembly conversion.
+
+The code in ``XXXTargetAsmInfo.h`` is usually a trivial declaration of the
+``XXXTargetAsmInfo`` class for use in ``XXXTargetAsmInfo.cpp``.  Similarly,
+``XXXTargetAsmInfo.cpp`` usually has a few declarations of ``XXXTargetAsmInfo``
+replacement values that override the default values in ``TargetAsmInfo.cpp``.
+For example in ``SparcTargetAsmInfo.cpp``:
+
+.. code-block:: c++
+
+  SparcTargetAsmInfo::SparcTargetAsmInfo(const SparcTargetMachine &TM) {
+    Data16bitsDirective = "\t.half\t";
+    Data32bitsDirective = "\t.word\t";
+    Data64bitsDirective = 0;  // .xword is only supported by V9.
+    ZeroDirective = "\t.skip\t";
+    CommentString = "!";
+    ConstantPoolSection = "\t.section \".rodata\",#alloc\n";
+  }
+
+The X86 assembly printer implementation (``X86TargetAsmInfo``) is an example
+where the target specific ``TargetAsmInfo`` class uses an overridden methods:
+``ExpandInlineAsm``.
+
+A target-specific implementation of ``AsmPrinter`` is written in
+``XXXAsmPrinter.cpp``, which implements the ``AsmPrinter`` class that converts
+the LLVM to printable assembly.  The implementation must include the following
+headers that have declarations for the ``AsmPrinter`` and
+``MachineFunctionPass`` classes.  The ``MachineFunctionPass`` is a subclass of
+``FunctionPass``.
+
+.. code-block:: c++
+
+  #include "llvm/CodeGen/AsmPrinter.h"
+  #include "llvm/CodeGen/MachineFunctionPass.h"
+
+As a ``FunctionPass``, ``AsmPrinter`` first calls ``doInitialization`` to set
+up the ``AsmPrinter``.  In ``SparcAsmPrinter``, a ``Mangler`` object is
+instantiated to process variable names.
+
+In ``XXXAsmPrinter.cpp``, the ``runOnMachineFunction`` method (declared in
+``MachineFunctionPass``) must be implemented for ``XXXAsmPrinter``.  In
+``MachineFunctionPass``, the ``runOnFunction`` method invokes
+``runOnMachineFunction``.  Target-specific implementations of
+``runOnMachineFunction`` differ, but generally do the following to process each
+machine function:
+
+* Call ``SetupMachineFunction`` to perform initialization.
+
+* Call ``EmitConstantPool`` to print out (to the output stream) constants which
+  have been spilled to memory.
+
+* Call ``EmitJumpTableInfo`` to print out jump tables used by the current
+  function.
+
+* Print out the label for the current function.
+
+* Print out the code for the function, including basic block labels and the
+  assembly for the instruction (using ``printInstruction``)
+
+The ``XXXAsmPrinter`` implementation must also include the code generated by
+TableGen that is output in the ``XXXGenAsmWriter.inc`` file.  The code in
+``XXXGenAsmWriter.inc`` contains an implementation of the ``printInstruction``
+method that may call these methods:
+
+* ``printOperand``
+* ``printMemOperand``
+* ``printCCOperand`` (for conditional statements)
+* ``printDataDirective``
+* ``printDeclare``
+* ``printImplicitDef``
+* ``printInlineAsm``
+
+The implementations of ``printDeclare``, ``printImplicitDef``,
+``printInlineAsm``, and ``printLabel`` in ``AsmPrinter.cpp`` are generally
+adequate for printing assembly and do not need to be overridden.
+
+The ``printOperand`` method is implemented with a long ``switch``/``case``
+statement for the type of operand: register, immediate, basic block, external
+symbol, global address, constant pool index, or jump table index.  For an
+instruction with a memory address operand, the ``printMemOperand`` method
+should be implemented to generate the proper output.  Similarly,
+``printCCOperand`` should be used to print a conditional operand.
+
+``doFinalization`` should be overridden in ``XXXAsmPrinter``, and it should be
+called to shut down the assembly printer.  During ``doFinalization``, global
+variables and constants are printed to output.
+
+Subtarget Support
+=================
+
+Subtarget support is used to inform the code generation process of instruction
+set variations for a given chip set.  For example, the LLVM SPARC
+implementation provided covers three major versions of the SPARC microprocessor
+architecture: Version 8 (V8, which is a 32-bit architecture), Version 9 (V9, a
+64-bit architecture), and the UltraSPARC architecture.  V8 has 16
+double-precision floating-point registers that are also usable as either 32
+single-precision or 8 quad-precision registers.  V8 is also purely big-endian.
+V9 has 32 double-precision floating-point registers that are also usable as 16
+quad-precision registers, but cannot be used as single-precision registers.
+The UltraSPARC architecture combines V9 with UltraSPARC Visual Instruction Set
+extensions.
+
+If subtarget support is needed, you should implement a target-specific
+``XXXSubtarget`` class for your architecture.  This class should process the
+command-line options ``-mcpu=`` and ``-mattr=``.
+
+TableGen uses definitions in the ``Target.td`` and ``Sparc.td`` files to
+generate code in ``SparcGenSubtarget.inc``.  In ``Target.td``, shown below, the
+``SubtargetFeature`` interface is defined.  The first 4 string parameters of
+the ``SubtargetFeature`` interface are a feature name, an attribute set by the
+feature, the value of the attribute, and a description of the feature.  (The
+fifth parameter is a list of features whose presence is implied, and its
+default value is an empty array.)
+
+.. code-block:: llvm
+
+  class SubtargetFeature<string n, string a, string v, string d,
+                         list<SubtargetFeature> i = []> {
+    string Name = n;
+    string Attribute = a;
+    string Value = v;
+    string Desc = d;
+    list<SubtargetFeature> Implies = i;
+  }
+
+In the ``Sparc.td`` file, the ``SubtargetFeature`` is used to define the
+following features.
+
+.. code-block:: llvm
+
+  def FeatureV9 : SubtargetFeature<"v9", "IsV9", "true",
+                       "Enable SPARC-V9 instructions">;
+  def FeatureV8Deprecated : SubtargetFeature<"deprecated-v8",
+                       "V8DeprecatedInsts", "true",
+                       "Enable deprecated V8 instructions in V9 mode">;
+  def FeatureVIS : SubtargetFeature<"vis", "IsVIS", "true",
+                       "Enable UltraSPARC Visual Instruction Set extensions">;
+
+Elsewhere in ``Sparc.td``, the ``Proc`` class is defined and then is used to
+define particular SPARC processor subtypes that may have the previously
+described features.
+
+.. code-block:: llvm
+
+  class Proc<string Name, list<SubtargetFeature> Features>
+    : Processor<Name, NoItineraries, Features>;
+
+  def : Proc<"generic",         []>;
+  def : Proc<"v8",              []>;
+  def : Proc<"supersparc",      []>;
+  def : Proc<"sparclite",       []>;
+  def : Proc<"f934",            []>;
+  def : Proc<"hypersparc",      []>;
+  def : Proc<"sparclite86x",    []>;
+  def : Proc<"sparclet",        []>;
+  def : Proc<"tsc701",          []>;
+  def : Proc<"v9",              [FeatureV9]>;
+  def : Proc<"ultrasparc",      [FeatureV9, FeatureV8Deprecated]>;
+  def : Proc<"ultrasparc3",     [FeatureV9, FeatureV8Deprecated]>;
+  def : Proc<"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>;
+
+From ``Target.td`` and ``Sparc.td`` files, the resulting
+``SparcGenSubtarget.inc`` specifies enum values to identify the features,
+arrays of constants to represent the CPU features and CPU subtypes, and the
+``ParseSubtargetFeatures`` method that parses the features string that sets
+specified subtarget options.  The generated ``SparcGenSubtarget.inc`` file
+should be included in the ``SparcSubtarget.cpp``.  The target-specific
+implementation of the ``XXXSubtarget`` method should follow this pseudocode:
+
+.. code-block:: c++
+
+  XXXSubtarget::XXXSubtarget(const Module &M, const std::string &FS) {
+    // Set the default features
+    // Determine default and user specified characteristics of the CPU
+    // Call ParseSubtargetFeatures(FS, CPU) to parse the features string
+    // Perform any additional operations
+  }
+
+JIT Support
+===========
+
+The implementation of a target machine optionally includes a Just-In-Time (JIT)
+code generator that emits machine code and auxiliary structures as binary
+output that can be written directly to memory.  To do this, implement JIT code
+generation by performing the following steps:
+
+* Write an ``XXXCodeEmitter.cpp`` file that contains a machine function pass
+  that transforms target-machine instructions into relocatable machine
+  code.
+
+* Write an ``XXXJITInfo.cpp`` file that implements the JIT interfaces for
+  target-specific code-generation activities, such as emitting machine code and
+  stubs.
+
+* Modify ``XXXTargetMachine`` so that it provides a ``TargetJITInfo`` object
+  through its ``getJITInfo`` method.
+
+There are several different approaches to writing the JIT support code.  For
+instance, TableGen and target descriptor files may be used for creating a JIT
+code generator, but are not mandatory.  For the Alpha and PowerPC target
+machines, TableGen is used to generate ``XXXGenCodeEmitter.inc``, which
+contains the binary coding of machine instructions and the
+``getBinaryCodeForInstr`` method to access those codes.  Other JIT
+implementations do not.
+
+Both ``XXXJITInfo.cpp`` and ``XXXCodeEmitter.cpp`` must include the
+``llvm/CodeGen/MachineCodeEmitter.h`` header file that defines the
+``MachineCodeEmitter`` class containing code for several callback functions
+that write data (in bytes, words, strings, etc.) to the output stream.
+
+Machine Code Emitter
+--------------------
+
+In ``XXXCodeEmitter.cpp``, a target-specific of the ``Emitter`` class is
+implemented as a function pass (subclass of ``MachineFunctionPass``).  The
+target-specific implementation of ``runOnMachineFunction`` (invoked by
+``runOnFunction`` in ``MachineFunctionPass``) iterates through the
+``MachineBasicBlock`` calls ``emitInstruction`` to process each instruction and
+emit binary code.  ``emitInstruction`` is largely implemented with case
+statements on the instruction types defined in ``XXXInstrInfo.h``.  For
+example, in ``X86CodeEmitter.cpp``, the ``emitInstruction`` method is built
+around the following ``switch``/``case`` statements:
+
+.. code-block:: c++
+
+  switch (Desc->TSFlags & X86::FormMask) {
+  case X86II::Pseudo:  // for not yet implemented instructions
+     ...               // or pseudo-instructions
+     break;
+  case X86II::RawFrm:  // for instructions with a fixed opcode value
+     ...
+     break;
+  case X86II::AddRegFrm: // for instructions that have one register operand
+     ...                 // added to their opcode
+     break;
+  case X86II::MRMDestReg:// for instructions that use the Mod/RM byte
+     ...                 // to specify a destination (register)
+     break;
+  case X86II::MRMDestMem:// for instructions that use the Mod/RM byte
+     ...                 // to specify a destination (memory)
+     break;
+  case X86II::MRMSrcReg: // for instructions that use the Mod/RM byte
+     ...                 // to specify a source (register)
+     break;
+  case X86II::MRMSrcMem: // for instructions that use the Mod/RM byte
+     ...                 // to specify a source (memory)
+     break;
+  case X86II::MRM0r: case X86II::MRM1r:  // for instructions that operate on
+  case X86II::MRM2r: case X86II::MRM3r:  // a REGISTER r/m operand and
+  case X86II::MRM4r: case X86II::MRM5r:  // use the Mod/RM byte and a field
+  case X86II::MRM6r: case X86II::MRM7r:  // to hold extended opcode data
+     ...
+     break;
+  case X86II::MRM0m: case X86II::MRM1m:  // for instructions that operate on
+  case X86II::MRM2m: case X86II::MRM3m:  // a MEMORY r/m operand and
+  case X86II::MRM4m: case X86II::MRM5m:  // use the Mod/RM byte and a field
+  case X86II::MRM6m: case X86II::MRM7m:  // to hold extended opcode data
+     ...
+     break;
+  case X86II::MRMInitReg: // for instructions whose source and
+     ...                  // destination are the same register
+     break;
+  }
+
+The implementations of these case statements often first emit the opcode and
+then get the operand(s).  Then depending upon the operand, helper methods may
+be called to process the operand(s).  For example, in ``X86CodeEmitter.cpp``,
+for the ``X86II::AddRegFrm`` case, the first data emitted (by ``emitByte``) is
+the opcode added to the register operand.  Then an object representing the
+machine operand, ``MO1``, is extracted.  The helper methods such as
+``isImmediate``, ``isGlobalAddress``, ``isExternalSymbol``,
+``isConstantPoolIndex``, and ``isJumpTableIndex`` determine the operand type.
+(``X86CodeEmitter.cpp`` also has private methods such as ``emitConstant``,
+``emitGlobalAddress``, ``emitExternalSymbolAddress``, ``emitConstPoolAddress``,
+and ``emitJumpTableAddress`` that emit the data into the output stream.)
+
+.. code-block:: c++
+
+  case X86II::AddRegFrm:
+    MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg()));
+
+    if (CurOp != NumOps) {
+      const MachineOperand &MO1 = MI.getOperand(CurOp++);
+      unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+      if (MO1.isImmediate())
+        emitConstant(MO1.getImm(), Size);
+      else {
+        unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+          : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+        if (Opcode == X86::MOV64ri)
+          rt = X86::reloc_absolute_dword;  // FIXME: add X86II flag?
+        if (MO1.isGlobalAddress()) {
+          bool NeedStub = isa<Function>(MO1.getGlobal());
+          bool isLazy = gvNeedsLazyPtr(MO1.getGlobal());
+          emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+                            NeedStub, isLazy);
+        } else if (MO1.isExternalSymbol())
+          emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+        else if (MO1.isConstantPoolIndex())
+          emitConstPoolAddress(MO1.getIndex(), rt);
+        else if (MO1.isJumpTableIndex())
+          emitJumpTableAddress(MO1.getIndex(), rt);
+      }
+    }
+    break;
+
+In the previous example, ``XXXCodeEmitter.cpp`` uses the variable ``rt``, which
+is a ``RelocationType`` enum that may be used to relocate addresses (for
+example, a global address with a PIC base offset).  The ``RelocationType`` enum
+for that target is defined in the short target-specific ``XXXRelocations.h``
+file.  The ``RelocationType`` is used by the ``relocate`` method defined in
+``XXXJITInfo.cpp`` to rewrite addresses for referenced global symbols.
+
+For example, ``X86Relocations.h`` specifies the following relocation types for
+the X86 addresses.  In all four cases, the relocated value is added to the
+value already in memory.  For ``reloc_pcrel_word`` and ``reloc_picrel_word``,
+there is an additional initial adjustment.
+
+.. code-block:: c++
+
+  enum RelocationType {
+    reloc_pcrel_word = 0,    // add reloc value after adjusting for the PC loc
+    reloc_picrel_word = 1,   // add reloc value after adjusting for the PIC base
+    reloc_absolute_word = 2, // absolute relocation; no additional adjustment
+    reloc_absolute_dword = 3 // absolute relocation; no additional adjustment
+  };
+
+Target JIT Info
+---------------
+
+``XXXJITInfo.cpp`` implements the JIT interfaces for target-specific
+code-generation activities, such as emitting machine code and stubs.  At
+minimum, a target-specific version of ``XXXJITInfo`` implements the following:
+
+* ``getLazyResolverFunction`` --- Initializes the JIT, gives the target a
+  function that is used for compilation.
+
+* ``emitFunctionStub`` --- Returns a native function with a specified address
+  for a callback function.
+
+* ``relocate`` --- Changes the addresses of referenced globals, based on
+  relocation types.
+
+* Callback function that are wrappers to a function stub that is used when the
+  real target is not initially known.
+
+``getLazyResolverFunction`` is generally trivial to implement.  It makes the
+incoming parameter as the global ``JITCompilerFunction`` and returns the
+callback function that will be used a function wrapper.  For the Alpha target
+(in ``AlphaJITInfo.cpp``), the ``getLazyResolverFunction`` implementation is
+simply:
+
+.. code-block:: c++
+
+  TargetJITInfo::LazyResolverFn AlphaJITInfo::getLazyResolverFunction(
+                                              JITCompilerFn F) {
+    JITCompilerFunction = F;
+    return AlphaCompilationCallback;
+  }
+
+For the X86 target, the ``getLazyResolverFunction`` implementation is a little
+more complicated, because it returns a different callback function for
+processors with SSE instructions and XMM registers.
+
+The callback function initially saves and later restores the callee register
+values, incoming arguments, and frame and return address.  The callback
+function needs low-level access to the registers or stack, so it is typically
+implemented with assembler.
+
diff --git a/docs/design_and_overview.rst b/docs/design_and_overview.rst
index ea684155e0..22e8125bb6 100644
--- a/docs/design_and_overview.rst
+++ b/docs/design_and_overview.rst
@@ -6,9 +6,10 @@ LLVM Design & Overview
 .. toctree::
    :hidden:
 
+   LangRef
    GetElementPtr
 
-* `LLVM Language Reference Manual <LangRef.html>`_
+* :doc:`LangRef`
 
   Defines the LLVM intermediate representation.
 
diff --git a/docs/development_process.rst b/docs/development_process.rst
index 74324b98a6..ecd4c6a616 100644
--- a/docs/development_process.rst
+++ b/docs/development_process.rst
@@ -9,6 +9,7 @@ Development Process Documentation
    MakefileGuide
    Projects
    LLVMBuild
+   HowToReleaseLLVM
 
 * :ref:`projects`
 
@@ -26,6 +27,6 @@ Development Process Documentation
 
   Describes how the LLVM makefiles work and how to use them.
 
-* `How To Release LLVM To The Public <HowToReleaseLLVM.html>`_
+* :doc:`HowToReleaseLLVM`
 
   This is a guide to preparing LLVM releases. Most developers can ignore it.
diff --git a/docs/programming.rst b/docs/programming.rst
index c4eec59417..3fea6ed427 100644
--- a/docs/programming.rst
+++ b/docs/programming.rst
@@ -12,6 +12,7 @@ Programming Documentation
    CompilerWriterInfo
    ExtendingLLVM
    HowToSetUpLLVMStyleRTTI
+   ProgrammersManual
 
 * `LLVM Language Reference Manual <LangRef.html>`_
 
@@ -22,7 +23,7 @@ Programming Documentation
 
   Information about LLVM's concurrency model.
 
-* `The LLVM Programmers Manual <ProgrammersManual.html>`_
+* :doc:`ProgrammersManual`
 
   Introduction to the general layout of the LLVM sourcebase, important classes
   and APIs, and some tips & tricks.
diff --git a/docs/subsystems.rst b/docs/subsystems.rst
index 35d7b8111d..275955be6e 100644
--- a/docs/subsystems.rst
+++ b/docs/subsystems.rst
@@ -21,6 +21,8 @@ Subsystem Documentation
    HowToUseInstrMappings
    SystemLibrary
    SourceLevelDebugging
+   WritingAnLLVMBackend
+   GarbageCollection
 
 .. FIXME: once LangRef is Sphinxified, HowToUseInstrMappings should be put
    under LangRef's toctree instead of this page's toctree.
@@ -29,8 +31,8 @@ Subsystem Documentation
     
    Information on how to write LLVM transformations and analyses.
     
-* `Writing an LLVM Backend <WritingAnLLVMBackend.html>`_
-    
+* :doc:`WritingAnLLVMBackend`
+
    Information on how to write LLVM backends for machine targets.
 
 * :ref:`code_generator`
@@ -48,9 +50,9 @@ Subsystem Documentation
     
    Information on how to write a new alias analysis implementation or how to
    use existing analyses.
-    
-* `Accurate Garbage Collection with LLVM <GarbageCollection.html>`_
-    
+
+* :doc:`GarbageCollection`
+
    The interfaces source-language compilers should use for compiling GC'd
    programs.
 
diff --git a/docs/tutorial/LangImpl1.html b/docs/tutorial/LangImpl1.html
deleted file mode 100644
index a65646f286..0000000000
--- a/docs/tutorial/LangImpl1.html
+++ /dev/null
@@ -1,348 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Tutorial Introduction and the Lexer</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Tutorial Introduction and the Lexer</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 1
-  <ol>
-    <li><a href="#intro">Tutorial Introduction</a></li>
-    <li><a href="#language">The Basic Language</a></li>
-    <li><a href="#lexer">The Lexer</a></li>
-  </ol>
-</li>
-<li><a href="LangImpl2.html">Chapter 2</a>: Implementing a Parser and AST</li>
-</ul>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Tutorial Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to the "Implementing a language with LLVM" tutorial.  This tutorial
-runs through the implementation of a simple language, showing how fun and
-easy it can be.  This tutorial will get you up and started as well as help to
-build a framework you can extend to other languages.  The code in this tutorial
-can also be used as a playground to hack on other LLVM specific things.
-</p>
-
-<p>
-The goal of this tutorial is to progressively unveil our language, describing
-how it is built up over time.  This will let us cover a fairly broad range of
-language design and LLVM-specific usage issues, showing and explaining the code
-for it all along the way, without overwhelming you with tons of details up
-front.</p>
-
-<p>It is useful to point out ahead of time that this tutorial is really about
-teaching compiler techniques and LLVM specifically, <em>not</em> about teaching
-modern and sane software engineering principles.  In practice, this means that
-we'll take a number of shortcuts to simplify the exposition.  For example, the
-code leaks memory, uses global variables all over the place, doesn't use nice
-design patterns like <a
-href="http://en.wikipedia.org/wiki/Visitor_pattern">visitors</a>, etc... but it
-is very simple.  If you dig in and use the code as a basis for future projects,
-fixing these deficiencies shouldn't be hard.</p>
-
-<p>I've tried to put this tutorial together in a way that makes chapters easy to
-skip over if you are already familiar with or are uninterested in the various
-pieces.  The structure of the tutorial is:
-</p>
-
-<ul>
-<li><b><a href="#language">Chapter #1</a>: Introduction to the Kaleidoscope
-language, and the definition of its Lexer</b> - This shows where we are going
-and the basic functionality that we want it to do.  In order to make this
-tutorial maximally understandable and hackable, we choose to implement 
-everything in C++ instead of using lexer and parser generators.  LLVM obviously
-works just fine with such tools, feel free to use one if you prefer.</li>
-<li><b><a href="LangImpl2.html">Chapter #2</a>: Implementing a Parser and
-AST</b> - With the lexer in place, we can talk about parsing techniques and
-basic AST construction.  This tutorial describes recursive descent parsing and
-operator precedence parsing.  Nothing in Chapters 1 or 2 is LLVM-specific,
-the code doesn't even link in LLVM at this point. :)</li>
-<li><b><a href="LangImpl3.html">Chapter #3</a>: Code generation to LLVM IR</b> -
-With the AST ready, we can show off how easy generation of LLVM IR really 
-is.</li>
-<li><b><a href="LangImpl4.html">Chapter #4</a>: Adding JIT and Optimizer
-Support</b> - Because a lot of people are interested in using LLVM as a JIT,
-we'll dive right into it and show you the 3 lines it takes to add JIT support.
-LLVM is also useful in many other ways, but this is one simple and "sexy" way
-to shows off its power. :)</li>
-<li><b><a href="LangImpl5.html">Chapter #5</a>: Extending the Language: Control
-Flow</b> - With the language up and running, we show how to extend it with
-control flow operations (if/then/else and a 'for' loop).  This gives us a chance
-to talk about simple SSA construction and control flow.</li>
-<li><b><a href="LangImpl6.html">Chapter #6</a>: Extending the Language: 
-User-defined Operators</b> - This is a silly but fun chapter that talks about
-extending the language to let the user program define their own arbitrary
-unary and binary operators (with assignable precedence!).  This lets us build a
-significant piece of the "language" as library routines.</li>
-<li><b><a href="LangImpl7.html">Chapter #7</a>: Extending the Language: Mutable
-Variables</b> - This chapter talks about adding user-defined local variables
-along with an assignment operator.  The interesting part about this is how
-easy and trivial it is to construct SSA form in LLVM: no, LLVM does <em>not</em>
-require your front-end to construct SSA form!</li>
-<li><b><a href="LangImpl8.html">Chapter #8</a>: Conclusion and other useful LLVM
-tidbits</b> - This chapter wraps up the series by talking about potential
-ways to extend the language, but also includes a bunch of pointers to info about
-"special topics" like adding garbage collection support, exceptions, debugging,
-support for "spaghetti stacks", and a bunch of other tips and tricks.</li>
-
-</ul>
-
-<p>By the end of the tutorial, we'll have written a bit less than 700 lines of 
-non-comment, non-blank, lines of code.  With this small amount of code, we'll
-have built up a very reasonable compiler for a non-trivial language including
-a hand-written lexer, parser, AST, as well as code generation support with a JIT
-compiler.  While other systems may have interesting "hello world" tutorials,
-I think the breadth of this tutorial is a great testament to the strengths of
-LLVM and why you should consider it if you're interested in language or compiler
-design.</p>
-
-<p>A note about this tutorial: we expect you to extend the language and play
-with it on your own.  Take the code and go crazy hacking away at it, compilers
-don't need to be scary creatures - it can be a lot of fun to play with
-languages!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="language">The Basic Language</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This tutorial will be illustrated with a toy language that we'll call
-"<a href="http://en.wikipedia.org/wiki/Kaleidoscope">Kaleidoscope</a>" (derived 
-from "meaning beautiful, form, and view").
-Kaleidoscope is a procedural language that allows you to define functions, use
-conditionals, math, etc.  Over the course of the tutorial, we'll extend
-Kaleidoscope to support the if/then/else construct, a for loop, user defined
-operators, JIT compilation with a simple command line interface, etc.</p>
-
-<p>Because we want to keep things simple, the only datatype in Kaleidoscope is a
-64-bit floating point type (aka 'double' in C parlance).  As such, all values
-are implicitly double precision and the language doesn't require type
-declarations.  This gives the language a very nice and simple syntax.  For
-example, the following simple example computes <a 
-href="http://en.wikipedia.org/wiki/Fibonacci_number">Fibonacci numbers:</a></p>
-
-<div class="doc_code">
-<pre>
-# Compute the x'th fibonacci number.
-def fib(x)
-  if x &lt; 3 then
-    1
-  else
-    fib(x-1)+fib(x-2)
-
-# This expression will compute the 40th number.
-fib(40)
-</pre>
-</div>
-
-<p>We also allow Kaleidoscope to call into standard library functions (the LLVM
-JIT makes this completely trivial).  This means that you can use the 'extern'
-keyword to define a function before you use it (this is also useful for mutually
-recursive functions).  For example:</p>
-
-<div class="doc_code">
-<pre>
-extern sin(arg);
-extern cos(arg);
-extern atan2(arg1 arg2);
-
-atan2(sin(.4), cos(42))
-</pre>
-</div>
-
-<p>A more interesting example is included in Chapter 6 where we write a little
-Kaleidoscope application that <a href="LangImpl6.html#example">displays 
-a Mandelbrot Set</a> at various levels of magnification.</p>
-
-<p>Lets dive into the implementation of this language!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="lexer">The Lexer</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>When it comes to implementing a language, the first thing needed is
-the ability to process a text file and recognize what it says.  The traditional
-way to do this is to use a "<a 
-href="http://en.wikipedia.org/wiki/Lexical_analysis">lexer</a>" (aka 'scanner')
-to break the input up into "tokens".  Each token returned by the lexer includes
-a token code and potentially some metadata (e.g. the numeric value of a number).
-First, we define the possibilities:
-</p>
-
-<div class="doc_code">
-<pre>
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5,
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-</pre>
-</div>
-
-<p>Each token returned by our lexer will either be one of the Token enum values
-or it will be an 'unknown' character like '+', which is returned as its ASCII
-value.  If the current token is an identifier, the <tt>IdentifierStr</tt>
-global variable holds the name of the identifier.  If the current token is a
-numeric literal (like 1.0), <tt>NumVal</tt> holds its value.  Note that we use
-global variables for simplicity, this is not the best choice for a real language
-implementation :).
-</p>
-
-<p>The actual implementation of the lexer is a single function named
-<tt>gettok</tt>. The <tt>gettok</tt> function is called to return the next token
-from standard input.  Its definition starts as:</p>
-
-<div class="doc_code">
-<pre>
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-</pre>
-</div>
-
-<p>
-<tt>gettok</tt> works by calling the C <tt>getchar()</tt> function to read
-characters one at a time from standard input.  It eats them as it recognizes
-them and stores the last character read, but not processed, in LastChar.  The
-first thing that it has to do is ignore whitespace between tokens.  This is 
-accomplished with the loop above.</p>
-
-<p>The next thing <tt>gettok</tt> needs to do is recognize identifiers and
-specific keywords like "def".  Kaleidoscope does this with this simple loop:</p>
-
-<div class="doc_code">
-<pre>
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    return tok_identifier;
-  }
-</pre>
-</div>
-
-<p>Note that this code sets the '<tt>IdentifierStr</tt>' global whenever it
-lexes an identifier.  Also, since language keywords are matched by the same
-loop, we handle them here inline.  Numeric values are similar:</p>
-
-<div class="doc_code">
-<pre>
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-</pre>
-</div>
-
-<p>This is all pretty straight-forward code for processing input.  When reading
-a numeric value from input, we use the C <tt>strtod</tt> function to convert it
-to a numeric value that we store in <tt>NumVal</tt>.  Note that this isn't doing
-sufficient error checking: it will incorrectly read "1.23.45.67" and handle it as
-if you typed in "1.23".  Feel free to extend it :).  Next we handle comments:
-</p>
-
-<div class="doc_code">
-<pre>
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-</pre>
-</div>
-
-<p>We handle comments by skipping to the end of the line and then return the
-next token.  Finally, if the input doesn't match one of the above cases, it is
-either an operator character like '+' or the end of the file.  These are handled
-with this code:</p>
-
-<div class="doc_code">
-<pre>
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-  
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-</pre>
-</div>
-
-<p>With this, we have the complete lexer for the basic Kaleidoscope language
-(the <a href="LangImpl2.html#code">full code listing</a> for the Lexer is
-available in the <a href="LangImpl2.html">next chapter</a> of the tutorial).
-Next we'll <a href="LangImpl2.html">build a simple parser that uses this to 
-build an Abstract Syntax Tree</a>.  When we have that, we'll include a driver
-so that you can use the lexer and parser together.
-</p>
-
-<a href="LangImpl2.html">Next: Implementing a Parser and AST</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/LangImpl1.rst b/docs/tutorial/LangImpl1.rst
new file mode 100644
index 0000000000..eb84e4c923
--- /dev/null
+++ b/docs/tutorial/LangImpl1.rst
@@ -0,0 +1,280 @@
+=================================================
+Kaleidoscope: Tutorial Introduction and the Lexer
+=================================================
+
+.. contents::
+   :local:
+
+Written by `Chris Lattner <mailto:sabre@nondot.org>`_
+
+Tutorial Introduction
+=====================
+
+Welcome to the "Implementing a language with LLVM" tutorial. This
+tutorial runs through the implementation of a simple language, showing
+how fun and easy it can be. This tutorial will get you up and started as
+well as help to build a framework you can extend to other languages. The
+code in this tutorial can also be used as a playground to hack on other
+LLVM specific things.
+
+The goal of this tutorial is to progressively unveil our language,
+describing how it is built up over time. This will let us cover a fairly
+broad range of language design and LLVM-specific usage issues, showing
+and explaining the code for it all along the way, without overwhelming
+you with tons of details up front.
+
+It is useful to point out ahead of time that this tutorial is really
+about teaching compiler techniques and LLVM specifically, *not* about
+teaching modern and sane software engineering principles. In practice,
+this means that we'll take a number of shortcuts to simplify the
+exposition. For example, the code leaks memory, uses global variables
+all over the place, doesn't use nice design patterns like
+`visitors <http://en.wikipedia.org/wiki/Visitor_pattern>`_, etc... but
+it is very simple. If you dig in and use the code as a basis for future
+projects, fixing these deficiencies shouldn't be hard.
+
+I've tried to put this tutorial together in a way that makes chapters
+easy to skip over if you are already familiar with or are uninterested
+in the various pieces. The structure of the tutorial is:
+
+-  `Chapter #1 <#language>`_: Introduction to the Kaleidoscope
+   language, and the definition of its Lexer - This shows where we are
+   going and the basic functionality that we want it to do. In order to
+   make this tutorial maximally understandable and hackable, we choose
+   to implement everything in C++ instead of using lexer and parser
+   generators. LLVM obviously works just fine with such tools, feel free
+   to use one if you prefer.
+-  `Chapter #2 <LangImpl2.html>`_: Implementing a Parser and AST -
+   With the lexer in place, we can talk about parsing techniques and
+   basic AST construction. This tutorial describes recursive descent
+   parsing and operator precedence parsing. Nothing in Chapters 1 or 2
+   is LLVM-specific, the code doesn't even link in LLVM at this point.
+   :)
+-  `Chapter #3 <LangImpl3.html>`_: Code generation to LLVM IR - With
+   the AST ready, we can show off how easy generation of LLVM IR really
+   is.
+-  `Chapter #4 <LangImpl4.html>`_: Adding JIT and Optimizer Support
+   - Because a lot of people are interested in using LLVM as a JIT,
+   we'll dive right into it and show you the 3 lines it takes to add JIT
+   support. LLVM is also useful in many other ways, but this is one
+   simple and "sexy" way to shows off its power. :)
+-  `Chapter #5 <LangImpl5.html>`_: Extending the Language: Control
+   Flow - With the language up and running, we show how to extend it
+   with control flow operations (if/then/else and a 'for' loop). This
+   gives us a chance to talk about simple SSA construction and control
+   flow.
+-  `Chapter #6 <LangImpl6.html>`_: Extending the Language:
+   User-defined Operators - This is a silly but fun chapter that talks
+   about extending the language to let the user program define their own
+   arbitrary unary and binary operators (with assignable precedence!).
+   This lets us build a significant piece of the "language" as library
+   routines.
+-  `Chapter #7 <LangImpl7.html>`_: Extending the Language: Mutable
+   Variables - This chapter talks about adding user-defined local
+   variables along with an assignment operator. The interesting part
+   about this is how easy and trivial it is to construct SSA form in
+   LLVM: no, LLVM does *not* require your front-end to construct SSA
+   form!
+-  `Chapter #8 <LangImpl8.html>`_: Conclusion and other useful LLVM
+   tidbits - This chapter wraps up the series by talking about
+   potential ways to extend the language, but also includes a bunch of
+   pointers to info about "special topics" like adding garbage
+   collection support, exceptions, debugging, support for "spaghetti
+   stacks", and a bunch of other tips and tricks.
+
+By the end of the tutorial, we'll have written a bit less than 700 lines
+of non-comment, non-blank, lines of code. With this small amount of
+code, we'll have built up a very reasonable compiler for a non-trivial
+language including a hand-written lexer, parser, AST, as well as code
+generation support with a JIT compiler. While other systems may have
+interesting "hello world" tutorials, I think the breadth of this
+tutorial is a great testament to the strengths of LLVM and why you
+should consider it if you're interested in language or compiler design.
+
+A note about this tutorial: we expect you to extend the language and
+play with it on your own. Take the code and go crazy hacking away at it,
+compilers don't need to be scary creatures - it can be a lot of fun to
+play with languages!
+
+The Basic Language
+==================
+
+This tutorial will be illustrated with a toy language that we'll call
+"`Kaleidoscope <http://en.wikipedia.org/wiki/Kaleidoscope>`_" (derived
+from "meaning beautiful, form, and view"). Kaleidoscope is a procedural
+language that allows you to define functions, use conditionals, math,
+etc. Over the course of the tutorial, we'll extend Kaleidoscope to
+support the if/then/else construct, a for loop, user defined operators,
+JIT compilation with a simple command line interface, etc.
+
+Because we want to keep things simple, the only datatype in Kaleidoscope
+is a 64-bit floating point type (aka 'double' in C parlance). As such,
+all values are implicitly double precision and the language doesn't
+require type declarations. This gives the language a very nice and
+simple syntax. For example, the following simple example computes
+`Fibonacci numbers: <http://en.wikipedia.org/wiki/Fibonacci_number>`_
+
+::
+
+    # Compute the x'th fibonacci number.
+    def fib(x)
+      if x < 3 then
+        1
+      else
+        fib(x-1)+fib(x-2)
+
+    # This expression will compute the 40th number.
+    fib(40)
+
+We also allow Kaleidoscope to call into standard library functions (the
+LLVM JIT makes this completely trivial). This means that you can use the
+'extern' keyword to define a function before you use it (this is also
+useful for mutually recursive functions). For example:
+
+::
+
+    extern sin(arg);
+    extern cos(arg);
+    extern atan2(arg1 arg2);
+
+    atan2(sin(.4), cos(42))
+
+A more interesting example is included in Chapter 6 where we write a
+little Kaleidoscope application that `displays a Mandelbrot
+Set <LangImpl6.html#example>`_ at various levels of magnification.
+
+Lets dive into the implementation of this language!
+
+The Lexer
+=========
+
+When it comes to implementing a language, the first thing needed is the
+ability to process a text file and recognize what it says. The
+traditional way to do this is to use a
+"`lexer <http://en.wikipedia.org/wiki/Lexical_analysis>`_" (aka
+'scanner') to break the input up into "tokens". Each token returned by
+the lexer includes a token code and potentially some metadata (e.g. the
+numeric value of a number). First, we define the possibilities:
+
+.. code-block:: c++
+
+    // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+    // of these for known things.
+    enum Token {
+      tok_eof = -1,
+
+      // commands
+      tok_def = -2, tok_extern = -3,
+
+      // primary
+      tok_identifier = -4, tok_number = -5,
+    };
+
+    static std::string IdentifierStr;  // Filled in if tok_identifier
+    static double NumVal;              // Filled in if tok_number
+
+Each token returned by our lexer will either be one of the Token enum
+values or it will be an 'unknown' character like '+', which is returned
+as its ASCII value. If the current token is an identifier, the
+``IdentifierStr`` global variable holds the name of the identifier. If
+the current token is a numeric literal (like 1.0), ``NumVal`` holds its
+value. Note that we use global variables for simplicity, this is not the
+best choice for a real language implementation :).
+
+The actual implementation of the lexer is a single function named
+``gettok``. The ``gettok`` function is called to return the next token
+from standard input. Its definition starts as:
+
+.. code-block:: c++
+
+    /// gettok - Return the next token from standard input.
+    static int gettok() {
+      static int LastChar = ' ';
+
+      // Skip any whitespace.
+      while (isspace(LastChar))
+        LastChar = getchar();
+
+``gettok`` works by calling the C ``getchar()`` function to read
+characters one at a time from standard input. It eats them as it
+recognizes them and stores the last character read, but not processed,
+in LastChar. The first thing that it has to do is ignore whitespace
+between tokens. This is accomplished with the loop above.
+
+The next thing ``gettok`` needs to do is recognize identifiers and
+specific keywords like "def". Kaleidoscope does this with this simple
+loop:
+
+.. code-block:: c++
+
+      if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+        IdentifierStr = LastChar;
+        while (isalnum((LastChar = getchar())))
+          IdentifierStr += LastChar;
+
+        if (IdentifierStr == "def") return tok_def;
+        if (IdentifierStr == "extern") return tok_extern;
+        return tok_identifier;
+      }
+
+Note that this code sets the '``IdentifierStr``' global whenever it
+lexes an identifier. Also, since language keywords are matched by the
+same loop, we handle them here inline. Numeric values are similar:
+
+.. code-block:: c++
+
+      if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+        std::string NumStr;
+        do {
+          NumStr += LastChar;
+          LastChar = getchar();
+        } while (isdigit(LastChar) || LastChar == '.');
+
+        NumVal = strtod(NumStr.c_str(), 0);
+        return tok_number;
+      }
+
+This is all pretty straight-forward code for processing input. When
+reading a numeric value from input, we use the C ``strtod`` function to
+convert it to a numeric value that we store in ``NumVal``. Note that
+this isn't doing sufficient error checking: it will incorrectly read
+"1.23.45.67" and handle it as if you typed in "1.23". Feel free to
+extend it :). Next we handle comments:
+
+.. code-block:: c++
+
+      if (LastChar == '#') {
+        // Comment until end of line.
+        do LastChar = getchar();
+        while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+        if (LastChar != EOF)
+          return gettok();
+      }
+
+We handle comments by skipping to the end of the line and then return
+the next token. Finally, if the input doesn't match one of the above
+cases, it is either an operator character like '+' or the end of the
+file. These are handled with this code:
+
+.. code-block:: c++
+
+      // Check for end of file.  Don't eat the EOF.
+      if (LastChar == EOF)
+        return tok_eof;
+
+      // Otherwise, just return the character as its ascii value.
+      int ThisChar = LastChar;
+      LastChar = getchar();
+      return ThisChar;
+    }
+
+With this, we have the complete lexer for the basic Kaleidoscope
+language (the `full code listing <LangImpl2.html#code>`_ for the Lexer
+is available in the `next chapter <LangImpl2.html>`_ of the tutorial).
+Next we'll `build a simple parser that uses this to build an Abstract
+Syntax Tree <LangImpl2.html>`_. When we have that, we'll include a
+driver so that you can use the lexer and parser together.
+
+`Next: Implementing a Parser and AST <LangImpl2.html>`_
+
diff --git a/docs/tutorial/LangImpl2.html b/docs/tutorial/LangImpl2.html
deleted file mode 100644
index 292dd4e516..0000000000
--- a/docs/tutorial/LangImpl2.html
+++ /dev/null
@@ -1,1231 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Implementing a Parser and AST</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Implementing a Parser and AST</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 2
-  <ol>
-    <li><a href="#intro">Chapter 2 Introduction</a></li>
-    <li><a href="#ast">The Abstract Syntax Tree (AST)</a></li>
-    <li><a href="#parserbasics">Parser Basics</a></li>
-    <li><a href="#parserprimexprs">Basic Expression Parsing</a></li>
-    <li><a href="#parserbinops">Binary Expression Parsing</a></li>
-    <li><a href="#parsertop">Parsing the Rest</a></li>
-    <li><a href="#driver">The Driver</a></li>
-    <li><a href="#conclusions">Conclusions</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="LangImpl3.html">Chapter 3</a>: Code generation to LLVM IR</li>
-</ul>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 2 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 2 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  This chapter shows you how to use the lexer, built in 
-<a href="LangImpl1.html">Chapter 1</a>, to build a full <a
-href="http://en.wikipedia.org/wiki/Parsing">parser</a> for
-our Kaleidoscope language.  Once we have a parser, we'll define and build an <a 
-href="http://en.wikipedia.org/wiki/Abstract_syntax_tree">Abstract Syntax 
-Tree</a> (AST).</p>
-
-<p>The parser we will build uses a combination of <a 
-href="http://en.wikipedia.org/wiki/Recursive_descent_parser">Recursive Descent
-Parsing</a> and <a href=
-"http://en.wikipedia.org/wiki/Operator-precedence_parser">Operator-Precedence 
-Parsing</a> to parse the Kaleidoscope language (the latter for 
-binary expressions and the former for everything else).  Before we get to
-parsing though, lets talk about the output of the parser: the Abstract Syntax
-Tree.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="ast">The Abstract Syntax Tree (AST)</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The AST for a program captures its behavior in such a way that it is easy for
-later stages of the compiler (e.g. code generation) to interpret.  We basically
-want one object for each construct in the language, and the AST should closely
-model the language.  In Kaleidoscope, we have expressions, a prototype, and a
-function object.  We'll start with expressions first:</p>
-
-<div class="doc_code">
-<pre>
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-};
-</pre>
-</div>
-
-<p>The code above shows the definition of the base ExprAST class and one
-subclass which we use for numeric literals.  The important thing to note about
-this code is that the NumberExprAST class captures the numeric value of the
-literal as an instance variable. This allows later phases of the compiler to
-know what the stored numeric value is.</p>
-
-<p>Right now we only create the AST,  so there are no useful accessor methods on
-them.  It would be very easy to add a virtual method to pretty print the code,
-for example.  Here are the other expression AST node definitions that we'll use
-in the basic form of the Kaleidoscope language:
-</p>
-
-<div class="doc_code">
-<pre>
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &amp;name) : Name(name) {}
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector&lt;ExprAST*&gt; Args;
-public:
-  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
-    : Callee(callee), Args(args) {}
-};
-</pre>
-</div>
-
-<p>This is all (intentionally) rather straight-forward: variables capture the
-variable name, binary operators capture their opcode (e.g. '+'), and calls
-capture a function name as well as a list of any argument expressions.  One thing 
-that is nice about our AST is that it captures the language features without 
-talking about the syntax of the language.  Note that there is no discussion about 
-precedence of binary operators, lexical structure, etc.</p>
-
-<p>For our basic language, these are all of the expression nodes we'll define.
-Because it doesn't have conditional control flow, it isn't Turing-complete;
-we'll fix that in a later installment.  The two things we need next are a way
-to talk about the interface to a function, and a way to talk about functions
-themselves:</p>
-
-<div class="doc_code">
-<pre>
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes).
-class PrototypeAST {
-  std::string Name;
-  std::vector&lt;std::string&gt; Args;
-public:
-  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args)
-    : Name(name), Args(args) {}
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-};
-</pre>
-</div>
-
-<p>In Kaleidoscope, functions are typed with just a count of their arguments.
-Since all values are double precision floating point, the type of each argument
-doesn't need to be stored anywhere.  In a more aggressive and realistic
-language, the "ExprAST" class would probably have a type field.</p>
-
-<p>With this scaffolding, we can now talk about parsing expressions and function
-bodies in Kaleidoscope.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="parserbasics">Parser Basics</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Now that we have an AST to build, we need to define the parser code to build
-it.  The idea here is that we want to parse something like "x+y" (which is
-returned as three tokens by the lexer) into an AST that could be generated with
-calls like this:</p>
-
-<div class="doc_code">
-<pre>
-  ExprAST *X = new VariableExprAST("x");
-  ExprAST *Y = new VariableExprAST("y");
-  ExprAST *Result = new BinaryExprAST('+', X, Y);
-</pre>
-</div>
-
-<p>In order to do this, we'll start by defining some basic helper routines:</p>
-
-<div class="doc_code">
-<pre>
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-</pre>
-</div>
-
-<p>
-This implements a simple token buffer around the lexer.  This allows 
-us to look one token ahead at what the lexer is returning.  Every function in
-our parser will assume that CurTok is the current token that needs to be
-parsed.</p>
-
-<div class="doc_code">
-<pre>
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-</pre>
-</div>
-
-<p>
-The <tt>Error</tt> routines are simple helper routines that our parser will use
-to handle errors.  The error recovery in our parser will not be the best and
-is not particular user-friendly, but it will be enough for our tutorial.  These
-routines make it easier to handle errors in routines that have various return
-types: they always return null.</p>
-
-<p>With these basic helper functions, we can implement the first
-piece of our grammar: numeric literals.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="parserprimexprs">Basic Expression Parsing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>We start with numeric literals, because they are the simplest to process.
-For each production in our grammar, we'll define a function which parses that
-production.  For numeric literals, we have:
-</p>
-
-<div class="doc_code">
-<pre>
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-</pre>
-</div>
-
-<p>This routine is very simple: it expects to be called when the current token
-is a <tt>tok_number</tt> token.  It takes the current number value, creates 
-a <tt>NumberExprAST</tt> node, advances the lexer to the next token, and finally
-returns.</p>
-
-<p>There are some interesting aspects to this.  The most important one is that
-this routine eats all of the tokens that correspond to the production and
-returns the lexer buffer with the next token (which is not part of the grammar
-production) ready to go.  This is a fairly standard way to go for recursive
-descent parsers.  For a better example, the parenthesis operator is defined like
-this:</p>
-
-<div class="doc_code">
-<pre>
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-</pre>
-</div>
-
-<p>This function illustrates a number of interesting things about the 
-parser:</p>
-
-<p>
-1) It shows how we use the Error routines.  When called, this function expects
-that the current token is a '(' token, but after parsing the subexpression, it
-is possible that there is no ')' waiting.  For example, if the user types in
-"(4 x" instead of "(4)", the parser should emit an error.  Because errors can
-occur, the parser needs a way to indicate that they happened: in our parser, we
-return null on an error.</p>
-
-<p>2) Another interesting aspect of this function is that it uses recursion by
-calling <tt>ParseExpression</tt> (we will soon see that <tt>ParseExpression</tt> can call
-<tt>ParseParenExpr</tt>).  This is powerful because it allows us to handle 
-recursive grammars, and keeps each production very simple.  Note that
-parentheses do not cause construction of AST nodes themselves.  While we could
-do it this way, the most important role of parentheses are to guide the parser
-and provide grouping.  Once the parser constructs the AST, parentheses are not
-needed.</p>
-
-<p>The next simple production is for handling variable references and function
-calls:</p>
-
-<div class="doc_code">
-<pre>
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector&lt;ExprAST*&gt; Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-</pre>
-</div>
-
-<p>This routine follows the same style as the other routines.  (It expects to be
-called if the current token is a <tt>tok_identifier</tt> token).  It also has
-recursion and error handling.  One interesting aspect of this is that it uses
-<em>look-ahead</em> to determine if the current identifier is a stand alone
-variable reference or if it is a function call expression.  It handles this by
-checking to see if the token after the identifier is a '(' token, constructing
-either a <tt>VariableExprAST</tt> or <tt>CallExprAST</tt> node as appropriate.
-</p>
-
-<p>Now that we have all of our simple expression-parsing logic in place, we can
-define a helper function to wrap it together into one entry point.  We call this
-class of expressions "primary" expressions, for reasons that will become more
-clear <a href="LangImpl6.html#unary">later in the tutorial</a>.  In order to
-parse an arbitrary primary expression, we need to determine what sort of
-expression it is:</p>
-
-<div class="doc_code">
-<pre>
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  }
-}
-</pre>
-</div>
-
-<p>Now that you see the definition of this function, it is more obvious why we
-can assume the state of CurTok in the various functions.  This uses look-ahead
-to determine which sort of expression is being inspected, and then parses it
-with a function call.</p>
-
-<p>Now that basic expressions are handled, we need to handle binary expressions.
-They are a bit more complex.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="parserbinops">Binary Expression Parsing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Binary expressions are significantly harder to parse because they are often
-ambiguous.  For example, when given the string "x+y*z", the parser can choose
-to parse it as either "(x+y)*z" or "x+(y*z)".  With common definitions from
-mathematics, we expect the later parse, because "*" (multiplication) has
-higher <em>precedence</em> than "+" (addition).</p>
-
-<p>There are many ways to handle this, but an elegant and efficient way is to
-use <a href=
-"http://en.wikipedia.org/wiki/Operator-precedence_parser">Operator-Precedence 
-Parsing</a>.  This parsing technique uses the precedence of binary operators to
-guide recursion.  To start with, we need a table of precedences:</p>
-
-<div class="doc_code">
-<pre>
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map&lt;char, int&gt; BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-    
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec &lt;= 0) return -1;
-  return TokPrec;
-}
-
-int main() {
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['&lt;'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-  ...
-}
-</pre>
-</div>
-
-<p>For the basic form of Kaleidoscope, we will only support 4 binary operators
-(this can obviously be extended by you, our brave and intrepid reader).  The
-<tt>GetTokPrecedence</tt> function returns the precedence for the current token,
-or -1 if the token is not a binary operator.  Having a map makes it easy to add
-new operators and makes it clear that the algorithm doesn't depend on the
-specific operators involved, but it would be easy enough to eliminate the map
-and do the comparisons in the <tt>GetTokPrecedence</tt> function.  (Or just use
-a fixed-size array).</p>
-
-<p>With the helper above defined, we can now start parsing binary expressions.
-The basic idea of operator precedence parsing is to break down an expression
-with potentially ambiguous binary operators into pieces.  Consider ,for example,
-the expression "a+b+(c+d)*e*f+g".  Operator precedence parsing considers this
-as a stream of primary expressions separated by binary operators.  As such,
-it will first parse the leading primary expression "a", then it will see the
-pairs [+, b] [+, (c+d)] [*, e] [*, f] and [+, g].  Note that because parentheses
-are primary expressions, the binary expression parser doesn't need to worry
-about nested subexpressions like (c+d) at all. 
-</p>
-
-<p>
-To start, an expression is a primary expression potentially followed by a
-sequence of [binop,primaryexpr] pairs:</p>
-
-<div class="doc_code">
-<pre>
-/// expression
-///   ::= primary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-</pre>
-</div>
-
-<p><tt>ParseBinOpRHS</tt> is the function that parses the sequence of pairs for
-us.  It takes a precedence and a pointer to an expression for the part that has been
-parsed so far.   Note that "x" is a perfectly valid expression: As such, "binoprhs" is
-allowed to be empty, in which case it returns the expression that is passed into
-it. In our example above, the code passes the expression for "a" into
-<tt>ParseBinOpRHS</tt> and the current token is "+".</p>
-
-<p>The precedence value passed into <tt>ParseBinOpRHS</tt> indicates the <em>
-minimal operator precedence</em> that the function is allowed to eat.  For
-example, if the current pair stream is [+, x] and <tt>ParseBinOpRHS</tt> is
-passed in a precedence of 40, it will not consume any tokens (because the
-precedence of '+' is only 20).  With this in mind, <tt>ParseBinOpRHS</tt> starts
-with:</p>
-
-<div class="doc_code">
-<pre>
-/// binoprhs
-///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec &lt; ExprPrec)
-      return LHS;
-</pre>
-</div>
-
-<p>This code gets the precedence of the current token and checks to see if if is
-too low.  Because we defined invalid tokens to have a precedence of -1, this 
-check implicitly knows that the pair-stream ends when the token stream runs out
-of binary operators.  If this check succeeds, we know that the token is a binary
-operator and that it will be included in this expression:</p>
-
-<div class="doc_code">
-<pre>
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
-    if (!RHS) return 0;
-</pre>
-</div>
-
-<p>As such, this code eats (and remembers) the binary operator and then parses
-the primary expression that follows.  This builds up the whole pair, the first of
-which is [+, b] for the running example.</p>
-
-<p>Now that we parsed the left-hand side of an expression and one pair of the 
-RHS sequence, we have to decide which way the expression associates.  In
-particular, we could have "(a+b) binop unparsed"  or "a + (b binop unparsed)".
-To determine this, we look ahead at "binop" to determine its precedence and 
-compare it to BinOp's precedence (which is '+' in this case):</p>
-
-<div class="doc_code">
-<pre>
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec &lt; NextPrec) {
-</pre>
-</div>
-
-<p>If the precedence of the binop to the right of "RHS" is lower or equal to the
-precedence of our current operator, then we know that the parentheses associate
-as "(a+b) binop ...".  In our example, the current operator is "+" and the next 
-operator is "+", we know that they have the same precedence.  In this case we'll
-create the AST node for "a+b", and then continue parsing:</p>
-
-<div class="doc_code">
-<pre>
-      ... if body omitted ...
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }  // loop around to the top of the while loop.
-}
-</pre>
-</div>
-
-<p>In our example above, this will turn "a+b+" into "(a+b)" and execute the next
-iteration of the loop, with "+" as the current token.  The code above will eat, 
-remember, and parse "(c+d)" as the primary expression, which makes the
-current pair equal to [+, (c+d)].  It will then evaluate the 'if' conditional above with 
-"*" as the binop to the right of the primary.  In this case, the precedence of "*" is
-higher than the precedence of "+" so the if condition will be entered.</p>
-
-<p>The critical question left here is "how can the if condition parse the right
-hand side in full"?  In particular, to build the AST correctly for our example,
-it needs to get all of "(c+d)*e*f" as the RHS expression variable.  The code to
-do this is surprisingly simple (code from the above two blocks duplicated for
-context):</p>
-
-<div class="doc_code">
-<pre>
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec &lt; NextPrec) {
-      <b>RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;</b>
-    }
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }  // loop around to the top of the while loop.
-}
-</pre>
-</div>
-
-<p>At this point, we know that the binary operator to the RHS of our primary
-has higher precedence than the binop we are currently parsing.  As such, we know
-that any sequence of pairs whose operators are all higher precedence than "+"
-should be parsed together and returned as "RHS".  To do this, we recursively
-invoke the <tt>ParseBinOpRHS</tt> function specifying "TokPrec+1" as the minimum
-precedence required for it to continue.  In our example above, this will cause
-it to return the AST node for "(c+d)*e*f" as RHS, which is then set as the RHS
-of the '+' expression.</p>
-
-<p>Finally, on the next iteration of the while loop, the "+g" piece is parsed
-and added to the AST.  With this little bit of code (14 non-trivial lines), we
-correctly handle fully general binary expression parsing in a very elegant way.
-This was a whirlwind tour of this code, and it is somewhat subtle.  I recommend
-running through it with a few tough examples to see how it works.
-</p>
-
-<p>This wraps up handling of expressions.  At this point, we can point the
-parser at an arbitrary token stream and build an expression from it, stopping
-at the first token that is not part of the expression.  Next up we need to
-handle function definitions, etc.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="parsertop">Parsing the Rest</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-The next thing missing is handling of function prototypes.  In Kaleidoscope,
-these are used both for 'extern' function declarations as well as function body
-definitions.  The code to do this is straight-forward and not very interesting
-(once you've survived expressions):
-</p>
-
-<div class="doc_code">
-<pre>
-/// prototype
-///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
-  if (CurTok != tok_identifier)
-    return ErrorP("Expected function name in prototype");
-
-  std::string FnName = IdentifierStr;
-  getNextToken();
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  // Read the list of argument names.
-  std::vector&lt;std::string&gt; ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  return new PrototypeAST(FnName, ArgNames);
-}
-</pre>
-</div>
-
-<p>Given this, a function definition is very simple, just a prototype plus
-an expression to implement the body:</p>
-
-<div class="doc_code">
-<pre>
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-</pre>
-</div>
-
-<p>In addition, we support 'extern' to declare functions like 'sin' and 'cos' as
-well as to support forward declaration of user functions.  These 'extern's are just
-prototypes with no body:</p>
-
-<div class="doc_code">
-<pre>
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-</pre>
-</div>
-
-<p>Finally, we'll also let the user type in arbitrary top-level expressions and
-evaluate them on the fly.  We will handle this by defining anonymous nullary
-(zero argument) functions for them:</p>
-
-<div class="doc_code">
-<pre>
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-</pre>
-</div>
-
-<p>Now that we have all the pieces, let's build a little driver that will let us
-actually <em>execute</em> this code we've built!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="driver">The Driver</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The driver for this simply invokes all of the parsing pieces with a top-level
-dispatch loop.  There isn't much interesting here, so I'll just include the
-top-level loop.  See <a href="#code">below</a> for full code in the "Top-Level
-Parsing" section.</p>
-
-<div class="doc_code">
-<pre>
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready&gt; ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-</pre>
-</div>
-
-<p>The most interesting part of this is that we ignore top-level semicolons.
-Why is this, you ask?  The basic reason is that if you type "4 + 5" at the
-command line, the parser doesn't know whether that is the end of what you will type
-or not.  For example, on the next line you could type "def foo..." in which case
-4+5 is the end of a top-level expression.  Alternatively you could type "* 6",
-which would continue the expression.  Having top-level semicolons allows you to
-type "4+5;", and the parser will know you are done.</p> 
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="conclusions">Conclusions</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>With just under 400 lines of commented code (240 lines of non-comment, 
-non-blank code), we fully defined our minimal language, including a lexer,
-parser, and AST builder.  With this done, the executable will validate 
-Kaleidoscope code and tell us if it is grammatically invalid.  For
-example, here is a sample interaction:</p>
-
-<div class="doc_code">
-<pre>
-$ <b>./a.out</b>
-ready&gt; <b>def foo(x y) x+foo(y, 4.0);</b>
-Parsed a function definition.
-ready&gt; <b>def foo(x y) x+y y;</b>
-Parsed a function definition.
-Parsed a top-level expr
-ready&gt; <b>def foo(x y) x+y );</b>
-Parsed a function definition.
-Error: unknown token when expecting an expression
-ready&gt; <b>extern sin(a);</b>
-ready&gt; Parsed an extern
-ready&gt; <b>^D</b>
-$ 
-</pre>
-</div>
-
-<p>There is a lot of room for extension here.  You can define new AST nodes,
-extend the language in many ways, etc.  In the <a href="LangImpl3.html">next
-installment</a>, we will describe how to generate LLVM Intermediate
-Representation (IR) from the AST.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for this and the previous chapter.  
-Note that it is fully self-contained: you don't need LLVM or any external
-libraries at all for this.  (Besides the C and C++ standard libraries, of
-course.)  To build this, just compile with:</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-clang++ -g -O3 toy.cpp
-# Run
-./a.out 
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<div class="doc_code">
-<pre>
-#include &lt;cstdio&gt;
-#include &lt;cstdlib&gt;
-#include &lt;string&gt;
-#include &lt;map&gt;
-#include &lt;vector&gt;
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &amp;name) : Name(name) {}
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector&lt;ExprAST*&gt; Args;
-public:
-  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
-    : Callee(callee), Args(args) {}
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes).
-class PrototypeAST {
-  std::string Name;
-  std::vector&lt;std::string&gt; Args;
-public:
-  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args)
-    : Name(name), Args(args) {}
-  
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map&lt;char, int&gt; BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec &lt;= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector&lt;ExprAST*&gt; Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  }
-}
-
-/// binoprhs
-///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec &lt; ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec &lt; NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= primary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
-  if (CurTok != tok_identifier)
-    return ErrorP("Expected function name in prototype");
-
-  std::string FnName = IdentifierStr;
-  getNextToken();
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector&lt;std::string&gt; ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  return new PrototypeAST(FnName, ArgNames);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing
-//===----------------------------------------------------------------------===//
-
-static void HandleDefinition() {
-  if (ParseDefinition()) {
-    fprintf(stderr, "Parsed a function definition.\n");
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (ParseExtern()) {
-    fprintf(stderr, "Parsed an extern\n");
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (ParseTopLevelExpr()) {
-    fprintf(stderr, "Parsed a top-level expr\n");
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready&gt; ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['&lt;'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready&gt; ");
-  getNextToken();
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  return 0;
-}
-</pre>
-</div>
-<a href="LangImpl3.html">Next: Implementing Code Generation to LLVM IR</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/LangImpl2.rst b/docs/tutorial/LangImpl2.rst
new file mode 100644
index 0000000000..0d62894a24
--- /dev/null
+++ b/docs/tutorial/LangImpl2.rst
@@ -0,0 +1,1098 @@
+===========================================
+Kaleidoscope: Implementing a Parser and AST
+===========================================
+
+.. contents::
+   :local:
+
+Written by `Chris Lattner <mailto:sabre@nondot.org>`_
+
+Chapter 2 Introduction
+======================
+
+Welcome to Chapter 2 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. This chapter shows you how to use the
+lexer, built in `Chapter 1 <LangImpl1.html>`_, to build a full
+`parser <http://en.wikipedia.org/wiki/Parsing>`_ for our Kaleidoscope
+language. Once we have a parser, we'll define and build an `Abstract
+Syntax Tree <http://en.wikipedia.org/wiki/Abstract_syntax_tree>`_ (AST).
+
+The parser we will build uses a combination of `Recursive Descent
+Parsing <http://en.wikipedia.org/wiki/Recursive_descent_parser>`_ and
+`Operator-Precedence
+Parsing <http://en.wikipedia.org/wiki/Operator-precedence_parser>`_ to
+parse the Kaleidoscope language (the latter for binary expressions and
+the former for everything else). Before we get to parsing though, lets
+talk about the output of the parser: the Abstract Syntax Tree.
+
+The Abstract Syntax Tree (AST)
+==============================
+
+The AST for a program captures its behavior in such a way that it is
+easy for later stages of the compiler (e.g. code generation) to
+interpret. We basically want one object for each construct in the
+language, and the AST should closely model the language. In
+Kaleidoscope, we have expressions, a prototype, and a function object.
+We'll start with expressions first:
+
+.. code-block:: c++
+
+    /// ExprAST - Base class for all expression nodes.
+    class ExprAST {
+    public:
+      virtual ~ExprAST() {}
+    };
+
+    /// NumberExprAST - Expression class for numeric literals like "1.0".
+    class NumberExprAST : public ExprAST {
+      double Val;
+    public:
+      NumberExprAST(double val) : Val(val) {}
+    };
+
+The code above shows the definition of the base ExprAST class and one
+subclass which we use for numeric literals. The important thing to note
+about this code is that the NumberExprAST class captures the numeric
+value of the literal as an instance variable. This allows later phases
+of the compiler to know what the stored numeric value is.
+
+Right now we only create the AST, so there are no useful accessor
+methods on them. It would be very easy to add a virtual method to pretty
+print the code, for example. Here are the other expression AST node
+definitions that we'll use in the basic form of the Kaleidoscope
+language:
+
+.. code-block:: c++
+
+    /// VariableExprAST - Expression class for referencing a variable, like "a".
+    class VariableExprAST : public ExprAST {
+      std::string Name;
+    public:
+      VariableExprAST(const std::string &name) : Name(name) {}
+    };
+
+    /// BinaryExprAST - Expression class for a binary operator.
+    class BinaryExprAST : public ExprAST {
+      char Op;
+      ExprAST *LHS, *RHS;
+    public:
+      BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
+        : Op(op), LHS(lhs), RHS(rhs) {}
+    };
+
+    /// CallExprAST - Expression class for function calls.
+    class CallExprAST : public ExprAST {
+      std::string Callee;
+      std::vector<ExprAST*> Args;
+    public:
+      CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+        : Callee(callee), Args(args) {}
+    };
+
+This is all (intentionally) rather straight-forward: variables capture
+the variable name, binary operators capture their opcode (e.g. '+'), and
+calls capture a function name as well as a list of any argument
+expressions. One thing that is nice about our AST is that it captures
+the language features without talking about the syntax of the language.
+Note that there is no discussion about precedence of binary operators,
+lexical structure, etc.
+
+For our basic language, these are all of the expression nodes we'll
+define. Because it doesn't have conditional control flow, it isn't
+Turing-complete; we'll fix that in a later installment. The two things
+we need next are a way to talk about the interface to a function, and a
+way to talk about functions themselves:
+
+.. code-block:: c++
+
+    /// PrototypeAST - This class represents the "prototype" for a function,
+    /// which captures its name, and its argument names (thus implicitly the number
+    /// of arguments the function takes).
+    class PrototypeAST {
+      std::string Name;
+      std::vector<std::string> Args;
+    public:
+      PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+        : Name(name), Args(args) {}
+    };
+
+    /// FunctionAST - This class represents a function definition itself.
+    class FunctionAST {
+      PrototypeAST *Proto;
+      ExprAST *Body;
+    public:
+      FunctionAST(PrototypeAST *proto, ExprAST *body)
+        : Proto(proto), Body(body) {}
+    };
+
+In Kaleidoscope, functions are typed with just a count of their
+arguments. Since all values are double precision floating point, the
+type of each argument doesn't need to be stored anywhere. In a more
+aggressive and realistic language, the "ExprAST" class would probably
+have a type field.
+
+With this scaffolding, we can now talk about parsing expressions and
+function bodies in Kaleidoscope.
+
+Parser Basics
+=============
+
+Now that we have an AST to build, we need to define the parser code to
+build it. The idea here is that we want to parse something like "x+y"
+(which is returned as three tokens by the lexer) into an AST that could
+be generated with calls like this:
+
+.. code-block:: c++
+
+      ExprAST *X = new VariableExprAST("x");
+      ExprAST *Y = new VariableExprAST("y");
+      ExprAST *Result = new BinaryExprAST('+', X, Y);
+
+In order to do this, we'll start by defining some basic helper routines:
+
+.. code-block:: c++
+
+    /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+    /// token the parser is looking at.  getNextToken reads another token from the
+    /// lexer and updates CurTok with its results.
+    static int CurTok;
+    static int getNextToken() {
+      return CurTok = gettok();
+    }
+
+This implements a simple token buffer around the lexer. This allows us
+to look one token ahead at what the lexer is returning. Every function
+in our parser will assume that CurTok is the current token that needs to
+be parsed.
+
+.. code-block:: c++
+
+
+    /// Error* - These are little helper functions for error handling.
+    ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+    PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+    FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+The ``Error`` routines are simple helper routines that our parser will
+use to handle errors. The error recovery in our parser will not be the
+best and is not particular user-friendly, but it will be enough for our
+tutorial. These routines make it easier to handle errors in routines
+that have various return types: they always return null.
+
+With these basic helper functions, we can implement the first piece of
+our grammar: numeric literals.
+
+Basic Expression Parsing
+========================
+
+We start with numeric literals, because they are the simplest to
+process. For each production in our grammar, we'll define a function
+which parses that production. For numeric literals, we have:
+
+.. code-block:: c++
+
+    /// numberexpr ::= number
+    static ExprAST *ParseNumberExpr() {
+      ExprAST *Result = new NumberExprAST(NumVal);
+      getNextToken(); // consume the number
+      return Result;
+    }
+
+This routine is very simple: it expects to be called when the current
+token is a ``tok_number`` token. It takes the current number value,
+creates a ``NumberExprAST`` node, advances the lexer to the next token,
+and finally returns.
+
+There are some interesting aspects to this. The most important one is
+that this routine eats all of the tokens that correspond to the
+production and returns the lexer buffer with the next token (which is
+not part of the grammar production) ready to go. This is a fairly
+standard way to go for recursive descent parsers. For a better example,
+the parenthesis operator is defined like this:
+
+.. code-block:: c++
+
+    /// parenexpr ::= '(' expression ')'
+    static ExprAST *ParseParenExpr() {
+      getNextToken();  // eat (.
+      ExprAST *V = ParseExpression();
+      if (!V) return 0;
+
+      if (CurTok != ')')
+        return Error("expected ')'");
+      getNextToken();  // eat ).
+      return V;
+    }
+
+This function illustrates a number of interesting things about the
+parser:
+
+1) It shows how we use the Error routines. When called, this function
+expects that the current token is a '(' token, but after parsing the
+subexpression, it is possible that there is no ')' waiting. For example,
+if the user types in "(4 x" instead of "(4)", the parser should emit an
+error. Because errors can occur, the parser needs a way to indicate that
+they happened: in our parser, we return null on an error.
+
+2) Another interesting aspect of this function is that it uses recursion
+by calling ``ParseExpression`` (we will soon see that
+``ParseExpression`` can call ``ParseParenExpr``). This is powerful
+because it allows us to handle recursive grammars, and keeps each
+production very simple. Note that parentheses do not cause construction
+of AST nodes themselves. While we could do it this way, the most
+important role of parentheses are to guide the parser and provide
+grouping. Once the parser constructs the AST, parentheses are not
+needed.
+
+The next simple production is for handling variable references and
+function calls:
+
+.. code-block:: c++
+
+    /// identifierexpr
+    ///   ::= identifier
+    ///   ::= identifier '(' expression* ')'
+    static ExprAST *ParseIdentifierExpr() {
+      std::string IdName = IdentifierStr;
+
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '(') // Simple variable ref.
+        return new VariableExprAST(IdName);
+
+      // Call.
+      getNextToken();  // eat (
+      std::vector<ExprAST*> Args;
+      if (CurTok != ')') {
+        while (1) {
+          ExprAST *Arg = ParseExpression();
+          if (!Arg) return 0;
+          Args.push_back(Arg);
+
+          if (CurTok == ')') break;
+
+          if (CurTok != ',')
+            return Error("Expected ')' or ',' in argument list");
+          getNextToken();
+        }
+      }
+
+      // Eat the ')'.
+      getNextToken();
+
+      return new CallExprAST(IdName, Args);
+    }
+
+This routine follows the same style as the other routines. (It expects
+to be called if the current token is a ``tok_identifier`` token). It
+also has recursion and error handling. One interesting aspect of this is
+that it uses *look-ahead* to determine if the current identifier is a
+stand alone variable reference or if it is a function call expression.
+It handles this by checking to see if the token after the identifier is
+a '(' token, constructing either a ``VariableExprAST`` or
+``CallExprAST`` node as appropriate.
+
+Now that we have all of our simple expression-parsing logic in place, we
+can define a helper function to wrap it together into one entry point.
+We call this class of expressions "primary" expressions, for reasons
+that will become more clear `later in the
+tutorial <LangImpl6.html#unary>`_. In order to parse an arbitrary
+primary expression, we need to determine what sort of expression it is:
+
+.. code-block:: c++
+
+    /// primary
+    ///   ::= identifierexpr
+    ///   ::= numberexpr
+    ///   ::= parenexpr
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      }
+    }
+
+Now that you see the definition of this function, it is more obvious why
+we can assume the state of CurTok in the various functions. This uses
+look-ahead to determine which sort of expression is being inspected, and
+then parses it with a function call.
+
+Now that basic expressions are handled, we need to handle binary
+expressions. They are a bit more complex.
+
+Binary Expression Parsing
+=========================
+
+Binary expressions are significantly harder to parse because they are
+often ambiguous. For example, when given the string "x+y\*z", the parser
+can choose to parse it as either "(x+y)\*z" or "x+(y\*z)". With common
+definitions from mathematics, we expect the later parse, because "\*"
+(multiplication) has higher *precedence* than "+" (addition).
+
+There are many ways to handle this, but an elegant and efficient way is
+to use `Operator-Precedence
+Parsing <http://en.wikipedia.org/wiki/Operator-precedence_parser>`_.
+This parsing technique uses the precedence of binary operators to guide
+recursion. To start with, we need a table of precedences:
+
+.. code-block:: c++
+
+    /// BinopPrecedence - This holds the precedence for each binary operator that is
+    /// defined.
+    static std::map<char, int> BinopPrecedence;
+
+    /// GetTokPrecedence - Get the precedence of the pending binary operator token.
+    static int GetTokPrecedence() {
+      if (!isascii(CurTok))
+        return -1;
+
+      // Make sure it's a declared binop.
+      int TokPrec = BinopPrecedence[CurTok];
+      if (TokPrec <= 0) return -1;
+      return TokPrec;
+    }
+
+    int main() {
+      // Install standard binary operators.
+      // 1 is lowest precedence.
+      BinopPrecedence['<'] = 10;
+      BinopPrecedence['+'] = 20;
+      BinopPrecedence['-'] = 20;
+      BinopPrecedence['*'] = 40;  // highest.
+      ...
+    }
+
+For the basic form of Kaleidoscope, we will only support 4 binary
+operators (this can obviously be extended by you, our brave and intrepid
+reader). The ``GetTokPrecedence`` function returns the precedence for
+the current token, or -1 if the token is not a binary operator. Having a
+map makes it easy to add new operators and makes it clear that the
+algorithm doesn't depend on the specific operators involved, but it
+would be easy enough to eliminate the map and do the comparisons in the
+``GetTokPrecedence`` function. (Or just use a fixed-size array).
+
+With the helper above defined, we can now start parsing binary
+expressions. The basic idea of operator precedence parsing is to break
+down an expression with potentially ambiguous binary operators into
+pieces. Consider ,for example, the expression "a+b+(c+d)\*e\*f+g".
+Operator precedence parsing considers this as a stream of primary
+expressions separated by binary operators. As such, it will first parse
+the leading primary expression "a", then it will see the pairs [+, b]
+[+, (c+d)] [\*, e] [\*, f] and [+, g]. Note that because parentheses are
+primary expressions, the binary expression parser doesn't need to worry
+about nested subexpressions like (c+d) at all.
+
+To start, an expression is a primary expression potentially followed by
+a sequence of [binop,primaryexpr] pairs:
+
+.. code-block:: c++
+
+    /// expression
+    ///   ::= primary binoprhs
+    ///
+    static ExprAST *ParseExpression() {
+      ExprAST *LHS = ParsePrimary();
+      if (!LHS) return 0;
+
+      return ParseBinOpRHS(0, LHS);
+    }
+
+``ParseBinOpRHS`` is the function that parses the sequence of pairs for
+us. It takes a precedence and a pointer to an expression for the part
+that has been parsed so far. Note that "x" is a perfectly valid
+expression: As such, "binoprhs" is allowed to be empty, in which case it
+returns the expression that is passed into it. In our example above, the
+code passes the expression for "a" into ``ParseBinOpRHS`` and the
+current token is "+".
+
+The precedence value passed into ``ParseBinOpRHS`` indicates the
+*minimal operator precedence* that the function is allowed to eat. For
+example, if the current pair stream is [+, x] and ``ParseBinOpRHS`` is
+passed in a precedence of 40, it will not consume any tokens (because
+the precedence of '+' is only 20). With this in mind, ``ParseBinOpRHS``
+starts with:
+
+.. code-block:: c++
+
+    /// binoprhs
+    ///   ::= ('+' primary)*
+    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+      // If this is a binop, find its precedence.
+      while (1) {
+        int TokPrec = GetTokPrecedence();
+
+        // If this is a binop that binds at least as tightly as the current binop,
+        // consume it, otherwise we are done.
+        if (TokPrec < ExprPrec)
+          return LHS;
+
+This code gets the precedence of the current token and checks to see if
+if is too low. Because we defined invalid tokens to have a precedence of
+-1, this check implicitly knows that the pair-stream ends when the token
+stream runs out of binary operators. If this check succeeds, we know
+that the token is a binary operator and that it will be included in this
+expression:
+
+.. code-block:: c++
+
+        // Okay, we know this is a binop.
+        int BinOp = CurTok;
+        getNextToken();  // eat binop
+
+        // Parse the primary expression after the binary operator.
+        ExprAST *RHS = ParsePrimary();
+        if (!RHS) return 0;
+
+As such, this code eats (and remembers) the binary operator and then
+parses the primary expression that follows. This builds up the whole
+pair, the first of which is [+, b] for the running example.
+
+Now that we parsed the left-hand side of an expression and one pair of
+the RHS sequence, we have to decide which way the expression associates.
+In particular, we could have "(a+b) binop unparsed" or "a + (b binop
+unparsed)". To determine this, we look ahead at "binop" to determine its
+precedence and compare it to BinOp's precedence (which is '+' in this
+case):
+
+.. code-block:: c++
+
+        // If BinOp binds less tightly with RHS than the operator after RHS, let
+        // the pending operator take RHS as its LHS.
+        int NextPrec = GetTokPrecedence();
+        if (TokPrec < NextPrec) {
+
+If the precedence of the binop to the right of "RHS" is lower or equal
+to the precedence of our current operator, then we know that the
+parentheses associate as "(a+b) binop ...". In our example, the current
+operator is "+" and the next operator is "+", we know that they have the
+same precedence. In this case we'll create the AST node for "a+b", and
+then continue parsing:
+
+.. code-block:: c++
+
+          ... if body omitted ...
+        }
+
+        // Merge LHS/RHS.
+        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+      }  // loop around to the top of the while loop.
+    }
+
+In our example above, this will turn "a+b+" into "(a+b)" and execute the
+next iteration of the loop, with "+" as the current token. The code
+above will eat, remember, and parse "(c+d)" as the primary expression,
+which makes the current pair equal to [+, (c+d)]. It will then evaluate
+the 'if' conditional above with "\*" as the binop to the right of the
+primary. In this case, the precedence of "\*" is higher than the
+precedence of "+" so the if condition will be entered.
+
+The critical question left here is "how can the if condition parse the
+right hand side in full"? In particular, to build the AST correctly for
+our example, it needs to get all of "(c+d)\*e\*f" as the RHS expression
+variable. The code to do this is surprisingly simple (code from the
+above two blocks duplicated for context):
+
+.. code-block:: c++
+
+        // If BinOp binds less tightly with RHS than the operator after RHS, let
+        // the pending operator take RHS as its LHS.
+        int NextPrec = GetTokPrecedence();
+        if (TokPrec < NextPrec) {
+          RHS = ParseBinOpRHS(TokPrec+1, RHS);
+          if (RHS == 0) return 0;
+        }
+        // Merge LHS/RHS.
+        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+      }  // loop around to the top of the while loop.
+    }
+
+At this point, we know that the binary operator to the RHS of our
+primary has higher precedence than the binop we are currently parsing.
+As such, we know that any sequence of pairs whose operators are all
+higher precedence than "+" should be parsed together and returned as
+"RHS". To do this, we recursively invoke the ``ParseBinOpRHS`` function
+specifying "TokPrec+1" as the minimum precedence required for it to
+continue. In our example above, this will cause it to return the AST
+node for "(c+d)\*e\*f" as RHS, which is then set as the RHS of the '+'
+expression.
+
+Finally, on the next iteration of the while loop, the "+g" piece is
+parsed and added to the AST. With this little bit of code (14
+non-trivial lines), we correctly handle fully general binary expression
+parsing in a very elegant way. This was a whirlwind tour of this code,
+and it is somewhat subtle. I recommend running through it with a few
+tough examples to see how it works.
+
+This wraps up handling of expressions. At this point, we can point the
+parser at an arbitrary token stream and build an expression from it,
+stopping at the first token that is not part of the expression. Next up
+we need to handle function definitions, etc.
+
+Parsing the Rest
+================
+
+The next thing missing is handling of function prototypes. In
+Kaleidoscope, these are used both for 'extern' function declarations as
+well as function body definitions. The code to do this is
+straight-forward and not very interesting (once you've survived
+expressions):
+
+.. code-block:: c++
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    static PrototypeAST *ParsePrototype() {
+      if (CurTok != tok_identifier)
+        return ErrorP("Expected function name in prototype");
+
+      std::string FnName = IdentifierStr;
+      getNextToken();
+
+      if (CurTok != '(')
+        return ErrorP("Expected '(' in prototype");
+
+      // Read the list of argument names.
+      std::vector<std::string> ArgNames;
+      while (getNextToken() == tok_identifier)
+        ArgNames.push_back(IdentifierStr);
+      if (CurTok != ')')
+        return ErrorP("Expected ')' in prototype");
+
+      // success.
+      getNextToken();  // eat ')'.
+
+      return new PrototypeAST(FnName, ArgNames);
+    }
+
+Given this, a function definition is very simple, just a prototype plus
+an expression to implement the body:
+
+.. code-block:: c++
+
+    /// definition ::= 'def' prototype expression
+    static FunctionAST *ParseDefinition() {
+      getNextToken();  // eat def.
+      PrototypeAST *Proto = ParsePrototype();
+      if (Proto == 0) return 0;
+
+      if (ExprAST *E = ParseExpression())
+        return new FunctionAST(Proto, E);
+      return 0;
+    }
+
+In addition, we support 'extern' to declare functions like 'sin' and
+'cos' as well as to support forward declaration of user functions. These
+'extern's are just prototypes with no body:
+
+.. code-block:: c++
+
+    /// external ::= 'extern' prototype
+    static PrototypeAST *ParseExtern() {
+      getNextToken();  // eat extern.
+      return ParsePrototype();
+    }
+
+Finally, we'll also let the user type in arbitrary top-level expressions
+and evaluate them on the fly. We will handle this by defining anonymous
+nullary (zero argument) functions for them:
+
+.. code-block:: c++
+
+    /// toplevelexpr ::= expression
+    static FunctionAST *ParseTopLevelExpr() {
+      if (ExprAST *E = ParseExpression()) {
+        // Make an anonymous proto.
+        PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+        return new FunctionAST(Proto, E);
+      }
+      return 0;
+    }
+
+Now that we have all the pieces, let's build a little driver that will
+let us actually *execute* this code we've built!
+
+The Driver
+==========
+
+The driver for this simply invokes all of the parsing pieces with a
+top-level dispatch loop. There isn't much interesting here, so I'll just
+include the top-level loop. See `below <#code>`_ for full code in the
+"Top-Level Parsing" section.
+
+.. code-block:: c++
+
+    /// top ::= definition | external | expression | ';'
+    static void MainLoop() {
+      while (1) {
+        fprintf(stderr, "ready> ");
+        switch (CurTok) {
+        case tok_eof:    return;
+        case ';':        getNextToken(); break;  // ignore top-level semicolons.
+        case tok_def:    HandleDefinition(); break;
+        case tok_extern: HandleExtern(); break;
+        default:         HandleTopLevelExpression(); break;
+        }
+      }
+    }
+
+The most interesting part of this is that we ignore top-level
+semicolons. Why is this, you ask? The basic reason is that if you type
+"4 + 5" at the command line, the parser doesn't know whether that is the
+end of what you will type or not. For example, on the next line you
+could type "def foo..." in which case 4+5 is the end of a top-level
+expression. Alternatively you could type "\* 6", which would continue
+the expression. Having top-level semicolons allows you to type "4+5;",
+and the parser will know you are done.
+
+Conclusions
+===========
+
+With just under 400 lines of commented code (240 lines of non-comment,
+non-blank code), we fully defined our minimal language, including a
+lexer, parser, and AST builder. With this done, the executable will
+validate Kaleidoscope code and tell us if it is grammatically invalid.
+For example, here is a sample interaction:
+
+.. code-block:: bash
+
+    $ ./a.out
+    ready> def foo(x y) x+foo(y, 4.0);
+    Parsed a function definition.
+    ready> def foo(x y) x+y y;
+    Parsed a function definition.
+    Parsed a top-level expr
+    ready> def foo(x y) x+y );
+    Parsed a function definition.
+    Error: unknown token when expecting an expression
+    ready> extern sin(a);
+    ready> Parsed an extern
+    ready> ^D
+    $
+
+There is a lot of room for extension here. You can define new AST nodes,
+extend the language in many ways, etc. In the `next
+installment <LangImpl3.html>`_, we will describe how to generate LLVM
+Intermediate Representation (IR) from the AST.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for this and the previous chapter.
+Note that it is fully self-contained: you don't need LLVM or any
+external libraries at all for this. (Besides the C and C++ standard
+libraries, of course.) To build this, just compile with:
+
+.. code-block:: bash
+
+    # Compile
+    clang++ -g -O3 toy.cpp
+    # Run
+    ./a.out
+
+Here is the code:
+
+.. code-block:: c++
+
+    #include <cstdio>
+    #include <cstdlib>
+    #include <string>
+    #include <map>
+    #include <vector>
+
+    //===----------------------------------------------------------------------===//
+    // Lexer
+    //===----------------------------------------------------------------------===//
+
+    // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+    // of these for known things.
+    enum Token {
+      tok_eof = -1,
+
+      // commands
+      tok_def = -2, tok_extern = -3,
+
+      // primary
+      tok_identifier = -4, tok_number = -5
+    };
+
+    static std::string IdentifierStr;  // Filled in if tok_identifier
+    static double NumVal;              // Filled in if tok_number
+
+    /// gettok - Return the next token from standard input.
+    static int gettok() {
+      static int LastChar = ' ';
+
+      // Skip any whitespace.
+      while (isspace(LastChar))
+        LastChar = getchar();
+
+      if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+        IdentifierStr = LastChar;
+        while (isalnum((LastChar = getchar())))
+          IdentifierStr += LastChar;
+
+        if (IdentifierStr == "def") return tok_def;
+        if (IdentifierStr == "extern") return tok_extern;
+        return tok_identifier;
+      }
+
+      if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+        std::string NumStr;
+        do {
+          NumStr += LastChar;
+          LastChar = getchar();
+        } while (isdigit(LastChar) || LastChar == '.');
+
+        NumVal = strtod(NumStr.c_str(), 0);
+        return tok_number;
+      }
+
+      if (LastChar == '#') {
+        // Comment until end of line.
+        do LastChar = getchar();
+        while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+        if (LastChar != EOF)
+          return gettok();
+      }
+
+      // Check for end of file.  Don't eat the EOF.
+      if (LastChar == EOF)
+        return tok_eof;
+
+      // Otherwise, just return the character as its ascii value.
+      int ThisChar = LastChar;
+      LastChar = getchar();
+      return ThisChar;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Abstract Syntax Tree (aka Parse Tree)
+    //===----------------------------------------------------------------------===//
+
+    /// ExprAST - Base class for all expression nodes.
+    class ExprAST {
+    public:
+      virtual ~ExprAST() {}
+    };
+
+    /// NumberExprAST - Expression class for numeric literals like "1.0".
+    class NumberExprAST : public ExprAST {
+      double Val;
+    public:
+      NumberExprAST(double val) : Val(val) {}
+    };
+
+    /// VariableExprAST - Expression class for referencing a variable, like "a".
+    class VariableExprAST : public ExprAST {
+      std::string Name;
+    public:
+      VariableExprAST(const std::string &name) : Name(name) {}
+    };
+
+    /// BinaryExprAST - Expression class for a binary operator.
+    class BinaryExprAST : public ExprAST {
+      char Op;
+      ExprAST *LHS, *RHS;
+    public:
+      BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
+        : Op(op), LHS(lhs), RHS(rhs) {}
+    };
+
+    /// CallExprAST - Expression class for function calls.
+    class CallExprAST : public ExprAST {
+      std::string Callee;
+      std::vector<ExprAST*> Args;
+    public:
+      CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+        : Callee(callee), Args(args) {}
+    };
+
+    /// PrototypeAST - This class represents the "prototype" for a function,
+    /// which captures its name, and its argument names (thus implicitly the number
+    /// of arguments the function takes).
+    class PrototypeAST {
+      std::string Name;
+      std::vector<std::string> Args;
+    public:
+      PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+        : Name(name), Args(args) {}
+
+    };
+
+    /// FunctionAST - This class represents a function definition itself.
+    class FunctionAST {
+      PrototypeAST *Proto;
+      ExprAST *Body;
+    public:
+      FunctionAST(PrototypeAST *proto, ExprAST *body)
+        : Proto(proto), Body(body) {}
+
+    };
+
+    //===----------------------------------------------------------------------===//
+    // Parser
+    //===----------------------------------------------------------------------===//
+
+    /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+    /// token the parser is looking at.  getNextToken reads another token from the
+    /// lexer and updates CurTok with its results.
+    static int CurTok;
+    static int getNextToken() {
+      return CurTok = gettok();
+    }
+
+    /// BinopPrecedence - This holds the precedence for each binary operator that is
+    /// defined.
+    static std::map<char, int> BinopPrecedence;
+
+    /// GetTokPrecedence - Get the precedence of the pending binary operator token.
+    static int GetTokPrecedence() {
+      if (!isascii(CurTok))
+        return -1;
+
+      // Make sure it's a declared binop.
+      int TokPrec = BinopPrecedence[CurTok];
+      if (TokPrec <= 0) return -1;
+      return TokPrec;
+    }
+
+    /// Error* - These are little helper functions for error handling.
+    ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+    PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+    FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+    static ExprAST *ParseExpression();
+
+    /// identifierexpr
+    ///   ::= identifier
+    ///   ::= identifier '(' expression* ')'
+    static ExprAST *ParseIdentifierExpr() {
+      std::string IdName = IdentifierStr;
+
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '(') // Simple variable ref.
+        return new VariableExprAST(IdName);
+
+      // Call.
+      getNextToken();  // eat (
+      std::vector<ExprAST*> Args;
+      if (CurTok != ')') {
+        while (1) {
+          ExprAST *Arg = ParseExpression();
+          if (!Arg) return 0;
+          Args.push_back(Arg);
+
+          if (CurTok == ')') break;
+
+          if (CurTok != ',')
+            return Error("Expected ')' or ',' in argument list");
+          getNextToken();
+        }
+      }
+
+      // Eat the ')'.
+      getNextToken();
+
+      return new CallExprAST(IdName, Args);
+    }
+
+    /// numberexpr ::= number
+    static ExprAST *ParseNumberExpr() {
+      ExprAST *Result = new NumberExprAST(NumVal);
+      getNextToken(); // consume the number
+      return Result;
+    }
+
+    /// parenexpr ::= '(' expression ')'
+    static ExprAST *ParseParenExpr() {
+      getNextToken();  // eat (.
+      ExprAST *V = ParseExpression();
+      if (!V) return 0;
+
+      if (CurTok != ')')
+        return Error("expected ')'");
+      getNextToken();  // eat ).
+      return V;
+    }
+
+    /// primary
+    ///   ::= identifierexpr
+    ///   ::= numberexpr
+    ///   ::= parenexpr
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      }
+    }
+
+    /// binoprhs
+    ///   ::= ('+' primary)*
+    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+      // If this is a binop, find its precedence.
+      while (1) {
+        int TokPrec = GetTokPrecedence();
+
+        // If this is a binop that binds at least as tightly as the current binop,
+        // consume it, otherwise we are done.
+        if (TokPrec < ExprPrec)
+          return LHS;
+
+        // Okay, we know this is a binop.
+        int BinOp = CurTok;
+        getNextToken();  // eat binop
+
+        // Parse the primary expression after the binary operator.
+        ExprAST *RHS = ParsePrimary();
+        if (!RHS) return 0;
+
+        // If BinOp binds less tightly with RHS than the operator after RHS, let
+        // the pending operator take RHS as its LHS.
+        int NextPrec = GetTokPrecedence();
+        if (TokPrec < NextPrec) {
+          RHS = ParseBinOpRHS(TokPrec+1, RHS);
+          if (RHS == 0) return 0;
+        }
+
+        // Merge LHS/RHS.
+        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+      }
+    }
+
+    /// expression
+    ///   ::= primary binoprhs
+    ///
+    static ExprAST *ParseExpression() {
+      ExprAST *LHS = ParsePrimary();
+      if (!LHS) return 0;
+
+      return ParseBinOpRHS(0, LHS);
+    }
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    static PrototypeAST *ParsePrototype() {
+      if (CurTok != tok_identifier)
+        return ErrorP("Expected function name in prototype");
+
+      std::string FnName = IdentifierStr;
+      getNextToken();
+
+      if (CurTok != '(')
+        return ErrorP("Expected '(' in prototype");
+
+      std::vector<std::string> ArgNames;
+      while (getNextToken() == tok_identifier)
+        ArgNames.push_back(IdentifierStr);
+      if (CurTok != ')')
+        return ErrorP("Expected ')' in prototype");
+
+      // success.
+      getNextToken();  // eat ')'.
+
+      return new PrototypeAST(FnName, ArgNames);
+    }
+
+    /// definition ::= 'def' prototype expression
+    static FunctionAST *ParseDefinition() {
+      getNextToken();  // eat def.
+      PrototypeAST *Proto = ParsePrototype();
+      if (Proto == 0) return 0;
+
+      if (ExprAST *E = ParseExpression())
+        return new FunctionAST(Proto, E);
+      return 0;
+    }
+
+    /// toplevelexpr ::= expression
+    static FunctionAST *ParseTopLevelExpr() {
+      if (ExprAST *E = ParseExpression()) {
+        // Make an anonymous proto.
+        PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+        return new FunctionAST(Proto, E);
+      }
+      return 0;
+    }
+
+    /// external ::= 'extern' prototype
+    static PrototypeAST *ParseExtern() {
+      getNextToken();  // eat extern.
+      return ParsePrototype();
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Top-Level parsing
+    //===----------------------------------------------------------------------===//
+
+    static void HandleDefinition() {
+      if (ParseDefinition()) {
+        fprintf(stderr, "Parsed a function definition.\n");
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleExtern() {
+      if (ParseExtern()) {
+        fprintf(stderr, "Parsed an extern\n");
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleTopLevelExpression() {
+      // Evaluate a top-level expression into an anonymous function.
+      if (ParseTopLevelExpr()) {
+        fprintf(stderr, "Parsed a top-level expr\n");
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    /// top ::= definition | external | expression | ';'
+    static void MainLoop() {
+      while (1) {
+        fprintf(stderr, "ready> ");
+        switch (CurTok) {
+        case tok_eof:    return;
+        case ';':        getNextToken(); break;  // ignore top-level semicolons.
+        case tok_def:    HandleDefinition(); break;
+        case tok_extern: HandleExtern(); break;
+        default:         HandleTopLevelExpression(); break;
+        }
+      }
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Main driver code.
+    //===----------------------------------------------------------------------===//
+
+    int main() {
+      // Install standard binary operators.
+      // 1 is lowest precedence.
+      BinopPrecedence['<'] = 10;
+      BinopPrecedence['+'] = 20;
+      BinopPrecedence['-'] = 20;
+      BinopPrecedence['*'] = 40;  // highest.
+
+      // Prime the first token.
+      fprintf(stderr, "ready> ");
+      getNextToken();
+
+      // Run the main "interpreter loop" now.
+      MainLoop();
+
+      return 0;
+    }
+
+`Next: Implementing Code Generation to LLVM IR <LangImpl3.html>`_
+
diff --git a/docs/tutorial/LangImpl3.html b/docs/tutorial/LangImpl3.html
deleted file mode 100644
index 57ff7373f6..0000000000
--- a/docs/tutorial/LangImpl3.html
+++ /dev/null
@@ -1,1268 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Implementing code generation to LLVM IR</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Code generation to LLVM IR</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 3
-  <ol>
-    <li><a href="#intro">Chapter 3 Introduction</a></li>
-    <li><a href="#basics">Code Generation Setup</a></li>
-    <li><a href="#exprs">Expression Code Generation</a></li>
-    <li><a href="#funcs">Function Code Generation</a></li>
-    <li><a href="#driver">Driver Changes and Closing Thoughts</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="LangImpl4.html">Chapter 4</a>: Adding JIT and Optimizer 
-Support</li>
-</ul>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 3 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 3 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  This chapter shows you how to transform the <a 
-href="LangImpl2.html">Abstract Syntax Tree</a>, built in Chapter 2, into LLVM IR.
-This will teach you a little bit about how LLVM does things, as well as
-demonstrate how easy it is to use.  It's much more work to build a lexer and
-parser than it is to generate LLVM IR code. :)
-</p>
-
-<p><b>Please note</b>: the code in this chapter and later require LLVM 2.2 or
-later.  LLVM 2.1 and before will not work with it.  Also note that you need
-to use a version of this tutorial that matches your LLVM release: If you are
-using an official LLVM release, use the version of the documentation included
-with your release or on the <a href="http://llvm.org/releases/">llvm.org 
-releases page</a>.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="basics">Code Generation Setup</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-In order to generate LLVM IR, we want some simple setup to get started.  First
-we define virtual code generation (codegen) methods in each AST class:</p>
-
-<div class="doc_code">
-<pre>
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  <b>virtual Value *Codegen() = 0;</b>
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  <b>virtual Value *Codegen();</b>
-};
-...
-</pre>
-</div>
-
-<p>The Codegen() method says to emit IR for that AST node along with all the things it
-depends on, and they all return an LLVM Value object. 
-"Value" is the class used to represent a "<a 
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Static Single
-Assignment (SSA)</a> register" or "SSA value" in LLVM.  The most distinct aspect
-of SSA values is that their value is computed as the related instruction
-executes, and it does not get a new value until (and if) the instruction
-re-executes.  In other words, there is no way to "change" an SSA value.  For
-more information, please read up on <a 
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Static Single
-Assignment</a> - the concepts are really quite natural once you grok them.</p>
-
-<p>Note that instead of adding virtual methods to the ExprAST class hierarchy,
-it could also make sense to use a <a
-href="http://en.wikipedia.org/wiki/Visitor_pattern">visitor pattern</a> or some
-other way to model this.  Again, this tutorial won't dwell on good software
-engineering practices: for our purposes, adding a virtual method is
-simplest.</p>
-
-<p>The
-second thing we want is an "Error" method like we used for the parser, which will
-be used to report errors found during code generation (for example, use of an
-undeclared parameter):</p>
-
-<div class="doc_code">
-<pre>
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-static Module *TheModule;
-static IRBuilder&lt;&gt; Builder(getGlobalContext());
-static std::map&lt;std::string, Value*&gt; NamedValues;
-</pre>
-</div>
-
-<p>The static variables will be used during code generation.  <tt>TheModule</tt>
-is the LLVM construct that contains all of the functions and global variables in
-a chunk of code.  In many ways, it is the top-level structure that the LLVM IR
-uses to contain code.</p>
-
-<p>The <tt>Builder</tt> object is a helper object that makes it easy to generate
-LLVM instructions.  Instances of the <a 
-href="http://llvm.org/doxygen/IRBuilder_8h-source.html"><tt>IRBuilder</tt></a> 
-class template keep track of the current place to insert instructions and has
-methods to create new instructions.</p>
-
-<p>The <tt>NamedValues</tt> map keeps track of which values are defined in the
-current scope and what their LLVM representation is.  (In other words, it is a
-symbol table for the code).  In this form of Kaleidoscope, the only things that
-can be referenced are function parameters.  As such, function parameters will
-be in this map when generating code for their function body.</p>
-
-<p>
-With these basics in place, we can start talking about how to generate code for
-each expression.  Note that this assumes that the <tt>Builder</tt> has been set
-up to generate code <em>into</em> something.  For now, we'll assume that this
-has already been done, and we'll just use it to emit code.
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="exprs">Expression Code Generation</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Generating LLVM code for expression nodes is very straightforward: less
-than 45 lines of commented code for all four of our expression nodes.  First
-we'll do numeric literals:</p>
-
-<div class="doc_code">
-<pre>
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-</pre>
-</div>
-
-<p>In the LLVM IR, numeric constants are represented with the
-<tt>ConstantFP</tt> class, which holds the numeric value in an <tt>APFloat</tt>
-internally (<tt>APFloat</tt> has the capability of holding floating point
-constants of <em>A</em>rbitrary <em>P</em>recision).  This code basically just
-creates and returns a <tt>ConstantFP</tt>.  Note that in the LLVM IR
-that constants are all uniqued together and shared.  For this reason, the API
-uses the "foo::get(...)" idiom instead of "new foo(..)" or "foo::Create(..)".</p>
-
-<div class="doc_code">
-<pre>
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
-}
-</pre>
-</div>
-
-<p>References to variables are also quite simple using LLVM.  In the simple version
-of Kaleidoscope, we assume that the variable has already been emitted somewhere
-and its value is available.  In practice, the only values that can be in the
-<tt>NamedValues</tt> map are function arguments.  This
-code simply checks to see that the specified name is in the map (if not, an 
-unknown variable is being referenced) and returns the value for it.  In future
-chapters, we'll add support for <a href="LangImpl5.html#for">loop induction 
-variables</a> in the symbol table, and for <a 
-href="LangImpl7.html#localvars">local variables</a>.</p>
-
-<div class="doc_code">
-<pre>
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS-&gt;Codegen();
-  Value *R = RHS-&gt;Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '&lt;':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: return ErrorV("invalid binary operator");
-  }
-}
-</pre>
-</div>
-
-<p>Binary operators start to get more interesting.  The basic idea here is that
-we recursively emit code for the left-hand side of the expression, then the 
-right-hand side, then we compute the result of the binary expression.  In this
-code, we do a simple switch on the opcode to create the right LLVM instruction.
-</p>
-
-<p>In the example above, the LLVM builder class is starting to show its value.  
-IRBuilder knows where to insert the newly created instruction, all you have to
-do is specify what instruction to create (e.g. with <tt>CreateFAdd</tt>), which
-operands to use (<tt>L</tt> and <tt>R</tt> here) and optionally provide a name
-for the generated instruction.</p>
-
-<p>One nice thing about LLVM is that the name is just a hint.  For instance, if
-the code above emits multiple "addtmp" variables, LLVM will automatically
-provide each one with an increasing, unique numeric suffix.  Local value names
-for instructions are purely optional, but it makes it much easier to read the
-IR dumps.</p>
-
-<p><a href="../LangRef.html#instref">LLVM instructions</a> are constrained by
-strict rules: for example, the Left and Right operators of
-an <a href="../LangRef.html#i_add">add instruction</a> must have the same
-type, and the result type of the add must match the operand types.  Because
-all values in Kaleidoscope are doubles, this makes for very simple code for add,
-sub and mul.</p>
-
-<p>On the other hand, LLVM specifies that the <a 
-href="../LangRef.html#i_fcmp">fcmp instruction</a> always returns an 'i1' value
-(a one bit integer).  The problem with this is that Kaleidoscope wants the value to be a 0.0 or 1.0 value.  In order to get these semantics, we combine the fcmp instruction with
-a <a href="../LangRef.html#i_uitofp">uitofp instruction</a>.  This instruction
-converts its input integer into a floating point value by treating the input
-as an unsigned value.  In contrast, if we used the <a 
-href="../LangRef.html#i_sitofp">sitofp instruction</a>, the Kaleidoscope '&lt;'
-operator would return 0.0 and -1.0, depending on the input value.</p>
-
-<div class="doc_code">
-<pre>
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule-&gt;getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF-&gt;arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector&lt;Value*&gt; ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]-&gt;Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-</pre>
-</div>
-
-<p>Code generation for function calls is quite straightforward with LLVM.  The
-code above initially does a function name lookup in the LLVM Module's symbol
-table.  Recall that the LLVM Module is the container that holds all of the
-functions we are JIT'ing.  By giving each function the same name as what the
-user specifies, we can use the LLVM symbol table to resolve function names for
-us.</p>
-
-<p>Once we have the function to call, we recursively codegen each argument that
-is to be passed in, and create an LLVM <a href="../LangRef.html#i_call">call
-instruction</a>.  Note that LLVM uses the native C calling conventions by
-default, allowing these calls to also call into standard library functions like
-"sin" and "cos", with no additional effort.</p>
-
-<p>This wraps up our handling of the four basic expressions that we have so far
-in Kaleidoscope.  Feel free to go in and add some more.  For example, by 
-browsing the <a href="../LangRef.html">LLVM language reference</a> you'll find
-several other interesting instructions that are really easy to plug into our
-basic framework.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="funcs">Function Code Generation</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Code generation for prototypes and functions must handle a number of
-details, which make their code less beautiful than expression code
-generation, but allows us to  illustrate some important points.  First, lets
-talk about code generation for prototypes: they are used both for function 
-bodies and external function declarations.  The code starts with:</p>
-
-<div class="doc_code">
-<pre>
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector&lt;Type*&gt; Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-</pre>
-</div>
-
-<p>This code packs a lot of power into a few lines.  Note first that this 
-function returns a "Function*" instead of a "Value*".  Because a "prototype"
-really talks about the external interface for a function (not the value computed
-by an expression), it makes sense for it to return the LLVM Function it
-corresponds to when codegen'd.</p>
-
-<p>The call to <tt>FunctionType::get</tt> creates
-the <tt>FunctionType</tt> that should be used for a given Prototype.  Since all
-function arguments in Kaleidoscope are of type double, the first line creates
-a vector of "N" LLVM double types.  It then uses the <tt>Functiontype::get</tt>
-method to create a function type that takes "N" doubles as arguments, returns
-one double as a result, and that is not vararg (the false parameter indicates
-this).  Note that Types in LLVM are uniqued just like Constants are, so you
-don't "new" a type, you "get" it.</p>
-
-<p>The final line above actually creates the function that the prototype will
-correspond to.  This indicates the type, linkage and name to use, as well as which
-module to insert into.  "<a href="../LangRef.html#linkage">external linkage</a>"
-means that the function may be defined outside the current module and/or that it
-is callable by functions outside the module.  The Name passed in is the name the
-user specified: since "<tt>TheModule</tt>" is specified, this name is registered
-in "<tt>TheModule</tt>"s symbol table, which is used by the function call code
-above.</p>
-
-<div class="doc_code">
-<pre>
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F-&gt;getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F-&gt;eraseFromParent();
-    F = TheModule-&gt;getFunction(Name);
-</pre>
-</div>
-
-<p>The Module symbol table works just like the Function symbol table when it
-comes to name conflicts: if a new function is created with a name that was previously
-added to the symbol table, the new function will get implicitly renamed when added to the
-Module.  The code above exploits this fact to determine if there was a previous
-definition of this function.</p>
-
-<p>In Kaleidoscope, I choose to allow redefinitions of functions in two cases:
-first, we want to allow 'extern'ing a function more than once, as long as the
-prototypes for the externs match (since all arguments have the same type, we
-just have to check that the number of arguments match).  Second, we want to
-allow 'extern'ing a function and then defining a body for it.  This is useful
-when defining mutually recursive functions.</p>
-
-<p>In order to implement this, the code above first checks to see if there is
-a collision on the name of the function.  If so, it deletes the function we just
-created (by calling <tt>eraseFromParent</tt>) and then calling 
-<tt>getFunction</tt> to get the existing function with the specified name.  Note
-that many APIs in LLVM have "erase" forms and "remove" forms.  The "remove" form
-unlinks the object from its parent (e.g. a Function from a Module) and returns
-it.  The "erase" form unlinks the object and then deletes it.</p>
-   
-<div class="doc_code">
-<pre>
-    // If F already has a body, reject this.
-    if (!F-&gt;empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F-&gt;arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-</pre>
-</div>
-
-<p>In order to verify the logic above, we first check to see if the pre-existing
-function is "empty".  In this case, empty means that it has no basic blocks in
-it, which means it has no body.  If it has no body, it is a forward 
-declaration.  Since we don't allow anything after a full definition of the
-function, the code rejects this case.  If the previous reference to a function
-was an 'extern', we simply verify that the number of arguments for that
-definition and this one match up.  If not, we emit an error.</p>
-
-<div class="doc_code">
-<pre>
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI-&gt;setName(Args[Idx]);
-    
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
-  return F;
-}
-</pre>
-</div>
-
-<p>The last bit of code for prototypes loops over all of the arguments in the
-function, setting the name of the LLVM Argument objects to match, and registering
-the arguments in the <tt>NamedValues</tt> map for future use by the
-<tt>VariableExprAST</tt> AST node.  Once this is set up, it returns the Function
-object to the caller.  Note that we don't check for conflicting 
-argument names here (e.g. "extern foo(a b a)").  Doing so would be very
-straight-forward with the mechanics we have already used above.</p>
-
-<div class="doc_code">
-<pre>
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto-&gt;Codegen();
-  if (TheFunction == 0)
-    return 0;
-</pre>
-</div>
-
-<p>Code generation for function definitions starts out simply enough: we just
-codegen the prototype (Proto) and verify that it is ok.  We then clear out the
-<tt>NamedValues</tt> map to make sure that there isn't anything in it from the
-last function we compiled.  Code generation of the prototype ensures that there
-is an LLVM Function object that is ready to go for us.</p>
-
-<div class="doc_code">
-<pre>
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body-&gt;Codegen()) {
-</pre>
-</div>
-
-<p>Now we get to the point where the <tt>Builder</tt> is set up.  The first
-line creates a new <a href="http://en.wikipedia.org/wiki/Basic_block">basic
-block</a> (named "entry"), which is inserted into <tt>TheFunction</tt>.  The
-second line then tells the builder that new instructions should be inserted into
-the end of the new basic block.  Basic blocks in LLVM are an important part
-of functions that define the <a 
-href="http://en.wikipedia.org/wiki/Control_flow_graph">Control Flow Graph</a>.
-Since we don't have any control flow, our functions will only contain one 
-block at this point.  We'll fix this in <a href="LangImpl5.html">Chapter 5</a> :).</p>
-
-<div class="doc_code">
-<pre>
-  if (Value *RetVal = Body-&gt;Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    return TheFunction;
-  }
-</pre>
-</div>
-
-<p>Once the insertion point is set up, we call the <tt>CodeGen()</tt> method for
-the root expression of the function.  If no error happens, this emits code to
-compute the expression into the entry block and returns the value that was
-computed.  Assuming no error, we then create an LLVM <a 
-href="../LangRef.html#i_ret">ret instruction</a>, which completes the function.
-Once the function is built, we call <tt>verifyFunction</tt>, which
-is provided by LLVM.  This function does a variety of consistency checks on the
-generated code, to determine if our compiler is doing everything right.  Using
-this is important: it can catch a lot of bugs.  Once the function is finished
-and validated, we return it.</p>
-  
-<div class="doc_code">
-<pre>
-  // Error reading body, remove function.
-  TheFunction-&gt;eraseFromParent();
-  return 0;
-}
-</pre>
-</div>
-
-<p>The only piece left here is handling of the error case.  For simplicity, we
-handle this by merely deleting the function we produced with the 
-<tt>eraseFromParent</tt> method.  This allows the user to redefine a function
-that they incorrectly typed in before: if we didn't delete it, it would live in
-the symbol table, with a body, preventing future redefinition.</p>
-
-<p>This code does have a bug, though.  Since the <tt>PrototypeAST::Codegen</tt>
-can return a previously defined forward declaration, our code can actually delete
-a forward declaration.  There are a number of ways to fix this bug, see what you
-can come up with!  Here is a testcase:</p>
-
-<div class="doc_code">
-<pre>
-extern foo(a b);     # ok, defines foo.
-def foo(a b) c;      # error, 'c' is invalid.
-def bar() foo(1, 2); # error, unknown function "foo"
-</pre>
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="driver">Driver Changes and Closing Thoughts</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-For now, code generation to LLVM doesn't really get us much, except that we can
-look at the pretty IR calls.  The sample code inserts calls to Codegen into the
-"<tt>HandleDefinition</tt>", "<tt>HandleExtern</tt>" etc functions, and then
-dumps out the LLVM IR.  This gives a nice way to look at the LLVM IR for simple
-functions.  For example:
-</p>
-
-<div class="doc_code">
-<pre>
-ready> <b>4+5</b>;
-Read top-level expression:
-define double @0() {
-entry:
-  ret double 9.000000e+00
-}
-</pre>
-</div>
-
-<p>Note how the parser turns the top-level expression into anonymous functions
-for us.  This will be handy when we add <a href="LangImpl4.html#jit">JIT 
-support</a> in the next chapter.  Also note that the code is very literally
-transcribed, no optimizations are being performed except simple constant
-folding done by IRBuilder.  We will 
-<a href="LangImpl4.html#trivialconstfold">add optimizations</a> explicitly in
-the next chapter.</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def foo(a b) a*a + 2*a*b + b*b;</b>
-Read function definition:
-define double @foo(double %a, double %b) {
-entry:
-  %multmp = fmul double %a, %a
-  %multmp1 = fmul double 2.000000e+00, %a
-  %multmp2 = fmul double %multmp1, %b
-  %addtmp = fadd double %multmp, %multmp2
-  %multmp3 = fmul double %b, %b
-  %addtmp4 = fadd double %addtmp, %multmp3
-  ret double %addtmp4
-}
-</pre>
-</div>
-
-<p>This shows some simple arithmetic. Notice the striking similarity to the
-LLVM builder calls that we use to create the instructions.</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def bar(a) foo(a, 4.0) + bar(31337);</b>
-Read function definition:
-define double @bar(double %a) {
-entry:
-  %calltmp = call double @foo(double %a, double 4.000000e+00)
-  %calltmp1 = call double @bar(double 3.133700e+04)
-  %addtmp = fadd double %calltmp, %calltmp1
-  ret double %addtmp
-}
-</pre>
-</div>
-
-<p>This shows some function calls.  Note that this function will take a long
-time to execute if you call it.  In the future we'll add conditional control 
-flow to actually make recursion useful :).</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>extern cos(x);</b>
-Read extern: 
-declare double @cos(double)
-
-ready&gt; <b>cos(1.234);</b>
-Read top-level expression:
-define double @1() {
-entry:
-  %calltmp = call double @cos(double 1.234000e+00)
-  ret double %calltmp
-}
-</pre>
-</div>
-
-<p>This shows an extern for the libm "cos" function, and a call to it.</p>
-
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>^D</b>
-; ModuleID = 'my cool jit'
-
-define double @0() {
-entry:
-  %addtmp = fadd double 4.000000e+00, 5.000000e+00
-  ret double %addtmp
-}
-
-define double @foo(double %a, double %b) {
-entry:
-  %multmp = fmul double %a, %a
-  %multmp1 = fmul double 2.000000e+00, %a
-  %multmp2 = fmul double %multmp1, %b
-  %addtmp = fadd double %multmp, %multmp2
-  %multmp3 = fmul double %b, %b
-  %addtmp4 = fadd double %addtmp, %multmp3
-  ret double %addtmp4
-}
-
-define double @bar(double %a) {
-entry:
-  %calltmp = call double @foo(double %a, double 4.000000e+00)
-  %calltmp1 = call double @bar(double 3.133700e+04)
-  %addtmp = fadd double %calltmp, %calltmp1
-  ret double %addtmp
-}
-
-declare double @cos(double)
-
-define double @1() {
-entry:
-  %calltmp = call double @cos(double 1.234000e+00)
-  ret double %calltmp
-}
-</pre>
-</div>
-
-<p>When you quit the current demo, it dumps out the IR for the entire module
-generated.  Here you can see the big picture with all the functions referencing
-each other.</p>
-
-<p>This wraps up the third chapter of the Kaleidoscope tutorial.  Up next, we'll
-describe how to <a href="LangImpl4.html">add JIT codegen and optimizer
-support</a> to this so we can actually start running code!</p>
-
-</div>
-
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with the
-LLVM code generator.    Because this uses the LLVM libraries, we need to link
-them in.  To do this, we use the <a 
-href="http://llvm.org/cmds/llvm-config.html">llvm-config</a> tool to inform
-our makefile/command line about which options to use:</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-clang++ -g -O3 toy.cpp `llvm-config --cppflags --ldflags --libs core` -o toy
-# Run
-./toy
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<div class="doc_code">
-<pre>
-// To build this:
-// See example below.
-
-#include "llvm/DerivedTypes.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Analysis/Verifier.h"
-#include &lt;cstdio&gt;
-#include &lt;string&gt;
-#include &lt;map&gt;
-#include &lt;vector&gt;
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &amp;name) : Name(name) {}
-  virtual Value *Codegen();
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector&lt;ExprAST*&gt; Args;
-public:
-  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
-    : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes).
-class PrototypeAST {
-  std::string Name;
-  std::vector&lt;std::string&gt; Args;
-public:
-  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args)
-    : Name(name), Args(args) {}
-  
-  Function *Codegen();
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-  Function *Codegen();
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map&lt;char, int&gt; BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec &lt;= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector&lt;ExprAST*&gt; Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  }
-}
-
-/// binoprhs
-///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec &lt; ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec &lt; NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= primary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
-  if (CurTok != tok_identifier)
-    return ErrorP("Expected function name in prototype");
-
-  std::string FnName = IdentifierStr;
-  getNextToken();
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector&lt;std::string&gt; ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  return new PrototypeAST(FnName, ArgNames);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Code Generation
-//===----------------------------------------------------------------------===//
-
-static Module *TheModule;
-static IRBuilder&lt;&gt; Builder(getGlobalContext());
-static std::map&lt;std::string, Value*&gt; NamedValues;
-
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
-}
-
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS-&gt;Codegen();
-  Value *R = RHS-&gt;Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '&lt;':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: return ErrorV("invalid binary operator");
-  }
-}
-
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule-&gt;getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF-&gt;arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector&lt;Value*&gt; ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]-&gt;Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector&lt;Type*&gt; Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-  
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-  
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F-&gt;getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F-&gt;eraseFromParent();
-    F = TheModule-&gt;getFunction(Name);
-    
-    // If F already has a body, reject this.
-    if (!F-&gt;empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F-&gt;arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-  
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI-&gt;setName(Args[Idx]);
-    
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
-  
-  return F;
-}
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto-&gt;Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body-&gt;Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    return TheFunction;
-  }
-  
-  // Error reading body, remove function.
-  TheFunction-&gt;eraseFromParent();
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing and JIT Driver
-//===----------------------------------------------------------------------===//
-
-static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      fprintf(stderr, "Read function definition:");
-      LF-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P-&gt;Codegen()) {
-      fprintf(stderr, "Read extern: ");
-      F-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      fprintf(stderr, "Read top-level expression:");
-      LF-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready&gt; ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// "Library" functions that can be "extern'd" from user code.
-//===----------------------------------------------------------------------===//
-
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  LLVMContext &amp;Context = getGlobalContext();
-
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['&lt;'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready&gt; ");
-  getNextToken();
-
-  // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit", Context);
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  // Print out all of the generated code.
-  TheModule-&gt;dump();
-
-  return 0;
-}
-</pre>
-</div>
-<a href="LangImpl4.html">Next: Adding JIT and Optimizer Support</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/LangImpl3.rst b/docs/tutorial/LangImpl3.rst
new file mode 100644
index 0000000000..01935a443b
--- /dev/null
+++ b/docs/tutorial/LangImpl3.rst
@@ -0,0 +1,1162 @@
+========================================
+Kaleidoscope: Code generation to LLVM IR
+========================================
+
+.. contents::
+   :local:
+
+Written by `Chris Lattner <mailto:sabre@nondot.org>`_
+
+Chapter 3 Introduction
+======================
+
+Welcome to Chapter 3 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. This chapter shows you how to transform
+the `Abstract Syntax Tree <LangImpl2.html>`_, built in Chapter 2, into
+LLVM IR. This will teach you a little bit about how LLVM does things, as
+well as demonstrate how easy it is to use. It's much more work to build
+a lexer and parser than it is to generate LLVM IR code. :)
+
+**Please note**: the code in this chapter and later require LLVM 2.2 or
+later. LLVM 2.1 and before will not work with it. Also note that you
+need to use a version of this tutorial that matches your LLVM release:
+If you are using an official LLVM release, use the version of the
+documentation included with your release or on the `llvm.org releases
+page <http://llvm.org/releases/>`_.
+
+Code Generation Setup
+=====================
+
+In order to generate LLVM IR, we want some simple setup to get started.
+First we define virtual code generation (codegen) methods in each AST
+class:
+
+.. code-block:: c++
+
+    /// ExprAST - Base class for all expression nodes.
+    class ExprAST {
+    public:
+      virtual ~ExprAST() {}
+      virtual Value *Codegen() = 0;
+    };
+
+    /// NumberExprAST - Expression class for numeric literals like "1.0".
+    class NumberExprAST : public ExprAST {
+      double Val;
+    public:
+      NumberExprAST(double val) : Val(val) {}
+      virtual Value *Codegen();
+    };
+    ...
+
+The Codegen() method says to emit IR for that AST node along with all
+the things it depends on, and they all return an LLVM Value object.
+"Value" is the class used to represent a "`Static Single Assignment
+(SSA) <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_
+register" or "SSA value" in LLVM. The most distinct aspect of SSA values
+is that their value is computed as the related instruction executes, and
+it does not get a new value until (and if) the instruction re-executes.
+In other words, there is no way to "change" an SSA value. For more
+information, please read up on `Static Single
+Assignment <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_
+- the concepts are really quite natural once you grok them.
+
+Note that instead of adding virtual methods to the ExprAST class
+hierarchy, it could also make sense to use a `visitor
+pattern <http://en.wikipedia.org/wiki/Visitor_pattern>`_ or some other
+way to model this. Again, this tutorial won't dwell on good software
+engineering practices: for our purposes, adding a virtual method is
+simplest.
+
+The second thing we want is an "Error" method like we used for the
+parser, which will be used to report errors found during code generation
+(for example, use of an undeclared parameter):
+
+.. code-block:: c++
+
+    Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+    static Module *TheModule;
+    static IRBuilder<> Builder(getGlobalContext());
+    static std::map<std::string, Value*> NamedValues;
+
+The static variables will be used during code generation. ``TheModule``
+is the LLVM construct that contains all of the functions and global
+variables in a chunk of code. In many ways, it is the top-level
+structure that the LLVM IR uses to contain code.
+
+The ``Builder`` object is a helper object that makes it easy to generate
+LLVM instructions. Instances of the
+```IRBuilder`` <http://llvm.org/doxygen/IRBuilder_8h-source.html>`_
+class template keep track of the current place to insert instructions
+and has methods to create new instructions.
+
+The ``NamedValues`` map keeps track of which values are defined in the
+current scope and what their LLVM representation is. (In other words, it
+is a symbol table for the code). In this form of Kaleidoscope, the only
+things that can be referenced are function parameters. As such, function
+parameters will be in this map when generating code for their function
+body.
+
+With these basics in place, we can start talking about how to generate
+code for each expression. Note that this assumes that the ``Builder``
+has been set up to generate code *into* something. For now, we'll assume
+that this has already been done, and we'll just use it to emit code.
+
+Expression Code Generation
+==========================
+
+Generating LLVM code for expression nodes is very straightforward: less
+than 45 lines of commented code for all four of our expression nodes.
+First we'll do numeric literals:
+
+.. code-block:: c++
+
+    Value *NumberExprAST::Codegen() {
+      return ConstantFP::get(getGlobalContext(), APFloat(Val));
+    }
+
+In the LLVM IR, numeric constants are represented with the
+``ConstantFP`` class, which holds the numeric value in an ``APFloat``
+internally (``APFloat`` has the capability of holding floating point
+constants of Arbitrary Precision). This code basically just creates
+and returns a ``ConstantFP``. Note that in the LLVM IR that constants
+are all uniqued together and shared. For this reason, the API uses the
+"foo::get(...)" idiom instead of "new foo(..)" or "foo::Create(..)".
+
+.. code-block:: c++
+
+    Value *VariableExprAST::Codegen() {
+      // Look this variable up in the function.
+      Value *V = NamedValues[Name];
+      return V ? V : ErrorV("Unknown variable name");
+    }
+
+References to variables are also quite simple using LLVM. In the simple
+version of Kaleidoscope, we assume that the variable has already been
+emitted somewhere and its value is available. In practice, the only
+values that can be in the ``NamedValues`` map are function arguments.
+This code simply checks to see that the specified name is in the map (if
+not, an unknown variable is being referenced) and returns the value for
+it. In future chapters, we'll add support for `loop induction
+variables <LangImpl5.html#for>`_ in the symbol table, and for `local
+variables <LangImpl7.html#localvars>`_.
+
+.. code-block:: c++
+
+    Value *BinaryExprAST::Codegen() {
+      Value *L = LHS->Codegen();
+      Value *R = RHS->Codegen();
+      if (L == 0 || R == 0) return 0;
+
+      switch (Op) {
+      case '+': return Builder.CreateFAdd(L, R, "addtmp");
+      case '-': return Builder.CreateFSub(L, R, "subtmp");
+      case '*': return Builder.CreateFMul(L, R, "multmp");
+      case '<':
+        L = Builder.CreateFCmpULT(L, R, "cmptmp");
+        // Convert bool 0/1 to double 0.0 or 1.0
+        return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                    "booltmp");
+      default: return ErrorV("invalid binary operator");
+      }
+    }
+
+Binary operators start to get more interesting. The basic idea here is
+that we recursively emit code for the left-hand side of the expression,
+then the right-hand side, then we compute the result of the binary
+expression. In this code, we do a simple switch on the opcode to create
+the right LLVM instruction.
+
+In the example above, the LLVM builder class is starting to show its
+value. IRBuilder knows where to insert the newly created instruction,
+all you have to do is specify what instruction to create (e.g. with
+``CreateFAdd``), which operands to use (``L`` and ``R`` here) and
+optionally provide a name for the generated instruction.
+
+One nice thing about LLVM is that the name is just a hint. For instance,
+if the code above emits multiple "addtmp" variables, LLVM will
+automatically provide each one with an increasing, unique numeric
+suffix. Local value names for instructions are purely optional, but it
+makes it much easier to read the IR dumps.
+
+`LLVM instructions <../LangRef.html#instref>`_ are constrained by strict
+rules: for example, the Left and Right operators of an `add
+instruction <../LangRef.html#i_add>`_ must have the same type, and the
+result type of the add must match the operand types. Because all values
+in Kaleidoscope are doubles, this makes for very simple code for add,
+sub and mul.
+
+On the other hand, LLVM specifies that the `fcmp
+instruction <../LangRef.html#i_fcmp>`_ always returns an 'i1' value (a
+one bit integer). The problem with this is that Kaleidoscope wants the
+value to be a 0.0 or 1.0 value. In order to get these semantics, we
+combine the fcmp instruction with a `uitofp
+instruction <../LangRef.html#i_uitofp>`_. This instruction converts its
+input integer into a floating point value by treating the input as an
+unsigned value. In contrast, if we used the `sitofp
+instruction <../LangRef.html#i_sitofp>`_, the Kaleidoscope '<' operator
+would return 0.0 and -1.0, depending on the input value.
+
+.. code-block:: c++
+
+    Value *CallExprAST::Codegen() {
+      // Look up the name in the global module table.
+      Function *CalleeF = TheModule->getFunction(Callee);
+      if (CalleeF == 0)
+        return ErrorV("Unknown function referenced");
+
+      // If argument mismatch error.
+      if (CalleeF->arg_size() != Args.size())
+        return ErrorV("Incorrect # arguments passed");
+
+      std::vector<Value*> ArgsV;
+      for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+        ArgsV.push_back(Args[i]->Codegen());
+        if (ArgsV.back() == 0) return 0;
+      }
+
+      return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
+    }
+
+Code generation for function calls is quite straightforward with LLVM.
+The code above initially does a function name lookup in the LLVM
+Module's symbol table. Recall that the LLVM Module is the container that
+holds all of the functions we are JIT'ing. By giving each function the
+same name as what the user specifies, we can use the LLVM symbol table
+to resolve function names for us.
+
+Once we have the function to call, we recursively codegen each argument
+that is to be passed in, and create an LLVM `call
+instruction <../LangRef.html#i_call>`_. Note that LLVM uses the native C
+calling conventions by default, allowing these calls to also call into
+standard library functions like "sin" and "cos", with no additional
+effort.
+
+This wraps up our handling of the four basic expressions that we have so
+far in Kaleidoscope. Feel free to go in and add some more. For example,
+by browsing the `LLVM language reference <../LangRef.html>`_ you'll find
+several other interesting instructions that are really easy to plug into
+our basic framework.
+
+Function Code Generation
+========================
+
+Code generation for prototypes and functions must handle a number of
+details, which make their code less beautiful than expression code
+generation, but allows us to illustrate some important points. First,
+lets talk about code generation for prototypes: they are used both for
+function bodies and external function declarations. The code starts
+with:
+
+.. code-block:: c++
+
+    Function *PrototypeAST::Codegen() {
+      // Make the function type:  double(double,double) etc.
+      std::vector<Type*> Doubles(Args.size(),
+                                 Type::getDoubleTy(getGlobalContext()));
+      FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                           Doubles, false);
+
+      Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+
+This code packs a lot of power into a few lines. Note first that this
+function returns a "Function\*" instead of a "Value\*". Because a
+"prototype" really talks about the external interface for a function
+(not the value computed by an expression), it makes sense for it to
+return the LLVM Function it corresponds to when codegen'd.
+
+The call to ``FunctionType::get`` creates the ``FunctionType`` that
+should be used for a given Prototype. Since all function arguments in
+Kaleidoscope are of type double, the first line creates a vector of "N"
+LLVM double types. It then uses the ``Functiontype::get`` method to
+create a function type that takes "N" doubles as arguments, returns one
+double as a result, and that is not vararg (the false parameter
+indicates this). Note that Types in LLVM are uniqued just like Constants
+are, so you don't "new" a type, you "get" it.
+
+The final line above actually creates the function that the prototype
+will correspond to. This indicates the type, linkage and name to use, as
+well as which module to insert into. "`external
+linkage <../LangRef.html#linkage>`_" means that the function may be
+defined outside the current module and/or that it is callable by
+functions outside the module. The Name passed in is the name the user
+specified: since "``TheModule``" is specified, this name is registered
+in "``TheModule``"s symbol table, which is used by the function call
+code above.
+
+.. code-block:: c++
+
+      // If F conflicted, there was already something named 'Name'.  If it has a
+      // body, don't allow redefinition or reextern.
+      if (F->getName() != Name) {
+        // Delete the one we just made and get the existing one.
+        F->eraseFromParent();
+        F = TheModule->getFunction(Name);
+
+The Module symbol table works just like the Function symbol table when
+it comes to name conflicts: if a new function is created with a name
+that was previously added to the symbol table, the new function will get
+implicitly renamed when added to the Module. The code above exploits
+this fact to determine if there was a previous definition of this
+function.
+
+In Kaleidoscope, I choose to allow redefinitions of functions in two
+cases: first, we want to allow 'extern'ing a function more than once, as
+long as the prototypes for the externs match (since all arguments have
+the same type, we just have to check that the number of arguments
+match). Second, we want to allow 'extern'ing a function and then
+defining a body for it. This is useful when defining mutually recursive
+functions.
+
+In order to implement this, the code above first checks to see if there
+is a collision on the name of the function. If so, it deletes the
+function we just created (by calling ``eraseFromParent``) and then
+calling ``getFunction`` to get the existing function with the specified
+name. Note that many APIs in LLVM have "erase" forms and "remove" forms.
+The "remove" form unlinks the object from its parent (e.g. a Function
+from a Module) and returns it. The "erase" form unlinks the object and
+then deletes it.
+
+.. code-block:: c++
+
+        // If F already has a body, reject this.
+        if (!F->empty()) {
+          ErrorF("redefinition of function");
+          return 0;
+        }
+
+        // If F took a different number of args, reject.
+        if (F->arg_size() != Args.size()) {
+          ErrorF("redefinition of function with different # args");
+          return 0;
+        }
+      }
+
+In order to verify the logic above, we first check to see if the
+pre-existing function is "empty". In this case, empty means that it has
+no basic blocks in it, which means it has no body. If it has no body, it
+is a forward declaration. Since we don't allow anything after a full
+definition of the function, the code rejects this case. If the previous
+reference to a function was an 'extern', we simply verify that the
+number of arguments for that definition and this one match up. If not,
+we emit an error.
+
+.. code-block:: c++
+
+      // Set names for all arguments.
+      unsigned Idx = 0;
+      for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+           ++AI, ++Idx) {
+        AI->setName(Args[Idx]);
+
+        // Add arguments to variable symbol table.
+        NamedValues[Args[Idx]] = AI;
+      }
+      return F;
+    }
+
+The last bit of code for prototypes loops over all of the arguments in
+the function, setting the name of the LLVM Argument objects to match,
+and registering the arguments in the ``NamedValues`` map for future use
+by the ``VariableExprAST`` AST node. Once this is set up, it returns the
+Function object to the caller. Note that we don't check for conflicting
+argument names here (e.g. "extern foo(a b a)"). Doing so would be very
+straight-forward with the mechanics we have already used above.
+
+.. code-block:: c++
+
+    Function *FunctionAST::Codegen() {
+      NamedValues.clear();
+
+      Function *TheFunction = Proto->Codegen();
+      if (TheFunction == 0)
+        return 0;
+
+Code generation for function definitions starts out simply enough: we
+just codegen the prototype (Proto) and verify that it is ok. We then
+clear out the ``NamedValues`` map to make sure that there isn't anything
+in it from the last function we compiled. Code generation of the
+prototype ensures that there is an LLVM Function object that is ready to
+go for us.
+
+.. code-block:: c++
+
+      // Create a new basic block to start insertion into.
+      BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+      Builder.SetInsertPoint(BB);
+
+      if (Value *RetVal = Body->Codegen()) {
+
+Now we get to the point where the ``Builder`` is set up. The first line
+creates a new `basic block <http://en.wikipedia.org/wiki/Basic_block>`_
+(named "entry"), which is inserted into ``TheFunction``. The second line
+then tells the builder that new instructions should be inserted into the
+end of the new basic block. Basic blocks in LLVM are an important part
+of functions that define the `Control Flow
+Graph <http://en.wikipedia.org/wiki/Control_flow_graph>`_. Since we
+don't have any control flow, our functions will only contain one block
+at this point. We'll fix this in `Chapter 5 <LangImpl5.html>`_ :).
+
+.. code-block:: c++
+
+      if (Value *RetVal = Body->Codegen()) {
+        // Finish off the function.
+        Builder.CreateRet(RetVal);
+
+        // Validate the generated code, checking for consistency.
+        verifyFunction(*TheFunction);
+
+        return TheFunction;
+      }
+
+Once the insertion point is set up, we call the ``CodeGen()`` method for
+the root expression of the function. If no error happens, this emits
+code to compute the expression into the entry block and returns the
+value that was computed. Assuming no error, we then create an LLVM `ret
+instruction <../LangRef.html#i_ret>`_, which completes the function.
+Once the function is built, we call ``verifyFunction``, which is
+provided by LLVM. This function does a variety of consistency checks on
+the generated code, to determine if our compiler is doing everything
+right. Using this is important: it can catch a lot of bugs. Once the
+function is finished and validated, we return it.
+
+.. code-block:: c++
+
+      // Error reading body, remove function.
+      TheFunction->eraseFromParent();
+      return 0;
+    }
+
+The only piece left here is handling of the error case. For simplicity,
+we handle this by merely deleting the function we produced with the
+``eraseFromParent`` method. This allows the user to redefine a function
+that they incorrectly typed in before: if we didn't delete it, it would
+live in the symbol table, with a body, preventing future redefinition.
+
+This code does have a bug, though. Since the ``PrototypeAST::Codegen``
+can return a previously defined forward declaration, our code can
+actually delete a forward declaration. There are a number of ways to fix
+this bug, see what you can come up with! Here is a testcase:
+
+::
+
+    extern foo(a b);     # ok, defines foo.
+    def foo(a b) c;      # error, 'c' is invalid.
+    def bar() foo(1, 2); # error, unknown function "foo"
+
+Driver Changes and Closing Thoughts
+===================================
+
+For now, code generation to LLVM doesn't really get us much, except that
+we can look at the pretty IR calls. The sample code inserts calls to
+Codegen into the "``HandleDefinition``", "``HandleExtern``" etc
+functions, and then dumps out the LLVM IR. This gives a nice way to look
+at the LLVM IR for simple functions. For example:
+
+::
+
+    ready> 4+5;
+    Read top-level expression:
+    define double @0() {
+    entry:
+      ret double 9.000000e+00
+    }
+
+Note how the parser turns the top-level expression into anonymous
+functions for us. This will be handy when we add `JIT
+support <LangImpl4.html#jit>`_ in the next chapter. Also note that the
+code is very literally transcribed, no optimizations are being performed
+except simple constant folding done by IRBuilder. We will `add
+optimizations <LangImpl4.html#trivialconstfold>`_ explicitly in the next
+chapter.
+
+::
+
+    ready> def foo(a b) a*a + 2*a*b + b*b;
+    Read function definition:
+    define double @foo(double %a, double %b) {
+    entry:
+      %multmp = fmul double %a, %a
+      %multmp1 = fmul double 2.000000e+00, %a
+      %multmp2 = fmul double %multmp1, %b
+      %addtmp = fadd double %multmp, %multmp2
+      %multmp3 = fmul double %b, %b
+      %addtmp4 = fadd double %addtmp, %multmp3
+      ret double %addtmp4
+    }
+
+This shows some simple arithmetic. Notice the striking similarity to the
+LLVM builder calls that we use to create the instructions.
+
+::
+
+    ready> def bar(a) foo(a, 4.0) + bar(31337);
+    Read function definition:
+    define double @bar(double %a) {
+    entry:
+      %calltmp = call double @foo(double %a, double 4.000000e+00)
+      %calltmp1 = call double @bar(double 3.133700e+04)
+      %addtmp = fadd double %calltmp, %calltmp1
+      ret double %addtmp
+    }
+
+This shows some function calls. Note that this function will take a long
+time to execute if you call it. In the future we'll add conditional
+control flow to actually make recursion useful :).
+
+::
+
+    ready> extern cos(x);
+    Read extern:
+    declare double @cos(double)
+
+    ready> cos(1.234);
+    Read top-level expression:
+    define double @1() {
+    entry:
+      %calltmp = call double @cos(double 1.234000e+00)
+      ret double %calltmp
+    }
+
+This shows an extern for the libm "cos" function, and a call to it.
+
+.. TODO:: Abandon Pygments' horrible `llvm` lexer. It just totally gives up
+   on highlighting this due to the first line.
+
+::
+
+    ready> ^D
+    ; ModuleID = 'my cool jit'
+
+    define double @0() {
+    entry:
+      %addtmp = fadd double 4.000000e+00, 5.000000e+00
+      ret double %addtmp
+    }
+
+    define double @foo(double %a, double %b) {
+    entry:
+      %multmp = fmul double %a, %a
+      %multmp1 = fmul double 2.000000e+00, %a
+      %multmp2 = fmul double %multmp1, %b
+      %addtmp = fadd double %multmp, %multmp2
+      %multmp3 = fmul double %b, %b
+      %addtmp4 = fadd double %addtmp, %multmp3
+      ret double %addtmp4
+    }
+
+    define double @bar(double %a) {
+    entry:
+      %calltmp = call double @foo(double %a, double 4.000000e+00)
+      %calltmp1 = call double @bar(double 3.133700e+04)
+      %addtmp = fadd double %calltmp, %calltmp1
+      ret double %addtmp
+    }
+
+    declare double @cos(double)
+
+    define double @1() {
+    entry:
+      %calltmp = call double @cos(double 1.234000e+00)
+      ret double %calltmp
+    }
+
+When you quit the current demo, it dumps out the IR for the entire
+module generated. Here you can see the big picture with all the
+functions referencing each other.
+
+This wraps up the third chapter of the Kaleidoscope tutorial. Up next,
+we'll describe how to `add JIT codegen and optimizer
+support <LangImpl4.html>`_ to this so we can actually start running
+code!
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+the LLVM code generator. Because this uses the LLVM libraries, we need
+to link them in. To do this, we use the
+`llvm-config <http://llvm.org/cmds/llvm-config.html>`_ tool to inform
+our makefile/command line about which options to use:
+
+.. code-block:: bash
+
+    # Compile
+    clang++ -g -O3 toy.cpp `llvm-config --cppflags --ldflags --libs core` -o toy
+    # Run
+    ./toy
+
+Here is the code:
+
+.. code-block:: c++
+
+    // To build this:
+    // See example below.
+
+    #include "llvm/DerivedTypes.h"
+    #include "llvm/IRBuilder.h"
+    #include "llvm/LLVMContext.h"
+    #include "llvm/Module.h"
+    #include "llvm/Analysis/Verifier.h"
+    #include <cstdio>
+    #include <string>
+    #include <map>
+    #include <vector>
+    using namespace llvm;
+
+    //===----------------------------------------------------------------------===//
+    // Lexer
+    //===----------------------------------------------------------------------===//
+
+    // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+    // of these for known things.
+    enum Token {
+      tok_eof = -1,
+
+      // commands
+      tok_def = -2, tok_extern = -3,
+
+      // primary
+      tok_identifier = -4, tok_number = -5
+    };
+
+    static std::string IdentifierStr;  // Filled in if tok_identifier
+    static double NumVal;              // Filled in if tok_number
+
+    /// gettok - Return the next token from standard input.
+    static int gettok() {
+      static int LastChar = ' ';
+
+      // Skip any whitespace.
+      while (isspace(LastChar))
+        LastChar = getchar();
+
+      if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+        IdentifierStr = LastChar;
+        while (isalnum((LastChar = getchar())))
+          IdentifierStr += LastChar;
+
+        if (IdentifierStr == "def") return tok_def;
+        if (IdentifierStr == "extern") return tok_extern;
+        return tok_identifier;
+      }
+
+      if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+        std::string NumStr;
+        do {
+          NumStr += LastChar;
+          LastChar = getchar();
+        } while (isdigit(LastChar) || LastChar == '.');
+
+        NumVal = strtod(NumStr.c_str(), 0);
+        return tok_number;
+      }
+
+      if (LastChar == '#') {
+        // Comment until end of line.
+        do LastChar = getchar();
+        while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+        if (LastChar != EOF)
+          return gettok();
+      }
+
+      // Check for end of file.  Don't eat the EOF.
+      if (LastChar == EOF)
+        return tok_eof;
+
+      // Otherwise, just return the character as its ascii value.
+      int ThisChar = LastChar;
+      LastChar = getchar();
+      return ThisChar;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Abstract Syntax Tree (aka Parse Tree)
+    //===----------------------------------------------------------------------===//
+
+    /// ExprAST - Base class for all expression nodes.
+    class ExprAST {
+    public:
+      virtual ~ExprAST() {}
+      virtual Value *Codegen() = 0;
+    };
+
+    /// NumberExprAST - Expression class for numeric literals like "1.0".
+    class NumberExprAST : public ExprAST {
+      double Val;
+    public:
+      NumberExprAST(double val) : Val(val) {}
+      virtual Value *Codegen();
+    };
+
+    /// VariableExprAST - Expression class for referencing a variable, like "a".
+    class VariableExprAST : public ExprAST {
+      std::string Name;
+    public:
+      VariableExprAST(const std::string &name) : Name(name) {}
+      virtual Value *Codegen();
+    };
+
+    /// BinaryExprAST - Expression class for a binary operator.
+    class BinaryExprAST : public ExprAST {
+      char Op;
+      ExprAST *LHS, *RHS;
+    public:
+      BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
+        : Op(op), LHS(lhs), RHS(rhs) {}
+      virtual Value *Codegen();
+    };
+
+    /// CallExprAST - Expression class for function calls.
+    class CallExprAST : public ExprAST {
+      std::string Callee;
+      std::vector<ExprAST*> Args;
+    public:
+      CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+        : Callee(callee), Args(args) {}
+      virtual Value *Codegen();
+    };
+
+    /// PrototypeAST - This class represents the "prototype" for a function,
+    /// which captures its name, and its argument names (thus implicitly the number
+    /// of arguments the function takes).
+    class PrototypeAST {
+      std::string Name;
+      std::vector<std::string> Args;
+    public:
+      PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+        : Name(name), Args(args) {}
+
+      Function *Codegen();
+    };
+
+    /// FunctionAST - This class represents a function definition itself.
+    class FunctionAST {
+      PrototypeAST *Proto;
+      ExprAST *Body;
+    public:
+      FunctionAST(PrototypeAST *proto, ExprAST *body)
+        : Proto(proto), Body(body) {}
+
+      Function *Codegen();
+    };
+
+    //===----------------------------------------------------------------------===//
+    // Parser
+    //===----------------------------------------------------------------------===//
+
+    /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+    /// token the parser is looking at.  getNextToken reads another token from the
+    /// lexer and updates CurTok with its results.
+    static int CurTok;
+    static int getNextToken() {
+      return CurTok = gettok();
+    }
+
+    /// BinopPrecedence - This holds the precedence for each binary operator that is
+    /// defined.
+    static std::map<char, int> BinopPrecedence;
+
+    /// GetTokPrecedence - Get the precedence of the pending binary operator token.
+    static int GetTokPrecedence() {
+      if (!isascii(CurTok))
+        return -1;
+
+      // Make sure it's a declared binop.
+      int TokPrec = BinopPrecedence[CurTok];
+      if (TokPrec <= 0) return -1;
+      return TokPrec;
+    }
+
+    /// Error* - These are little helper functions for error handling.
+    ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+    PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+    FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+    static ExprAST *ParseExpression();
+
+    /// identifierexpr
+    ///   ::= identifier
+    ///   ::= identifier '(' expression* ')'
+    static ExprAST *ParseIdentifierExpr() {
+      std::string IdName = IdentifierStr;
+
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '(') // Simple variable ref.
+        return new VariableExprAST(IdName);
+
+      // Call.
+      getNextToken();  // eat (
+      std::vector<ExprAST*> Args;
+      if (CurTok != ')') {
+        while (1) {
+          ExprAST *Arg = ParseExpression();
+          if (!Arg) return 0;
+          Args.push_back(Arg);
+
+          if (CurTok == ')') break;
+
+          if (CurTok != ',')
+            return Error("Expected ')' or ',' in argument list");
+          getNextToken();
+        }
+      }
+
+      // Eat the ')'.
+      getNextToken();
+
+      return new CallExprAST(IdName, Args);
+    }
+
+    /// numberexpr ::= number
+    static ExprAST *ParseNumberExpr() {
+      ExprAST *Result = new NumberExprAST(NumVal);
+      getNextToken(); // consume the number
+      return Result;
+    }
+
+    /// parenexpr ::= '(' expression ')'
+    static ExprAST *ParseParenExpr() {
+      getNextToken();  // eat (.
+      ExprAST *V = ParseExpression();
+      if (!V) return 0;
+
+      if (CurTok != ')')
+        return Error("expected ')'");
+      getNextToken();  // eat ).
+      return V;
+    }
+
+    /// primary
+    ///   ::= identifierexpr
+    ///   ::= numberexpr
+    ///   ::= parenexpr
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      }
+    }
+
+    /// binoprhs
+    ///   ::= ('+' primary)*
+    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+      // If this is a binop, find its precedence.
+      while (1) {
+        int TokPrec = GetTokPrecedence();
+
+        // If this is a binop that binds at least as tightly as the current binop,
+        // consume it, otherwise we are done.
+        if (TokPrec < ExprPrec)
+          return LHS;
+
+        // Okay, we know this is a binop.
+        int BinOp = CurTok;
+        getNextToken();  // eat binop
+
+        // Parse the primary expression after the binary operator.
+        ExprAST *RHS = ParsePrimary();
+        if (!RHS) return 0;
+
+        // If BinOp binds less tightly with RHS than the operator after RHS, let
+        // the pending operator take RHS as its LHS.
+        int NextPrec = GetTokPrecedence();
+        if (TokPrec < NextPrec) {
+          RHS = ParseBinOpRHS(TokPrec+1, RHS);
+          if (RHS == 0) return 0;
+        }
+
+        // Merge LHS/RHS.
+        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+      }
+    }
+
+    /// expression
+    ///   ::= primary binoprhs
+    ///
+    static ExprAST *ParseExpression() {
+      ExprAST *LHS = ParsePrimary();
+      if (!LHS) return 0;
+
+      return ParseBinOpRHS(0, LHS);
+    }
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    static PrototypeAST *ParsePrototype() {
+      if (CurTok != tok_identifier)
+        return ErrorP("Expected function name in prototype");
+
+      std::string FnName = IdentifierStr;
+      getNextToken();
+
+      if (CurTok != '(')
+        return ErrorP("Expected '(' in prototype");
+
+      std::vector<std::string> ArgNames;
+      while (getNextToken() == tok_identifier)
+        ArgNames.push_back(IdentifierStr);
+      if (CurTok != ')')
+        return ErrorP("Expected ')' in prototype");
+
+      // success.
+      getNextToken();  // eat ')'.
+
+      return new PrototypeAST(FnName, ArgNames);
+    }
+
+    /// definition ::= 'def' prototype expression
+    static FunctionAST *ParseDefinition() {
+      getNextToken();  // eat def.
+      PrototypeAST *Proto = ParsePrototype();
+      if (Proto == 0) return 0;
+
+      if (ExprAST *E = ParseExpression())
+        return new FunctionAST(Proto, E);
+      return 0;
+    }
+
+    /// toplevelexpr ::= expression
+    static FunctionAST *ParseTopLevelExpr() {
+      if (ExprAST *E = ParseExpression()) {
+        // Make an anonymous proto.
+        PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+        return new FunctionAST(Proto, E);
+      }
+      return 0;
+    }
+
+    /// external ::= 'extern' prototype
+    static PrototypeAST *ParseExtern() {
+      getNextToken();  // eat extern.
+      return ParsePrototype();
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Code Generation
+    //===----------------------------------------------------------------------===//
+
+    static Module *TheModule;
+    static IRBuilder<> Builder(getGlobalContext());
+    static std::map<std::string, Value*> NamedValues;
+
+    Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+    Value *NumberExprAST::Codegen() {
+      return ConstantFP::get(getGlobalContext(), APFloat(Val));
+    }
+
+    Value *VariableExprAST::Codegen() {
+      // Look this variable up in the function.
+      Value *V = NamedValues[Name];
+      return V ? V : ErrorV("Unknown variable name");
+    }
+
+    Value *BinaryExprAST::Codegen() {
+      Value *L = LHS->Codegen();
+      Value *R = RHS->Codegen();
+      if (L == 0 || R == 0) return 0;
+
+      switch (Op) {
+      case '+': return Builder.CreateFAdd(L, R, "addtmp");
+      case '-': return Builder.CreateFSub(L, R, "subtmp");
+      case '*': return Builder.CreateFMul(L, R, "multmp");
+      case '<':
+        L = Builder.CreateFCmpULT(L, R, "cmptmp");
+        // Convert bool 0/1 to double 0.0 or 1.0
+        return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                    "booltmp");
+      default: return ErrorV("invalid binary operator");
+      }
+    }
+
+    Value *CallExprAST::Codegen() {
+      // Look up the name in the global module table.
+      Function *CalleeF = TheModule->getFunction(Callee);
+      if (CalleeF == 0)
+        return ErrorV("Unknown function referenced");
+
+      // If argument mismatch error.
+      if (CalleeF->arg_size() != Args.size())
+        return ErrorV("Incorrect # arguments passed");
+
+      std::vector<Value*> ArgsV;
+      for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+        ArgsV.push_back(Args[i]->Codegen());
+        if (ArgsV.back() == 0) return 0;
+      }
+
+      return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
+    }
+
+    Function *PrototypeAST::Codegen() {
+      // Make the function type:  double(double,double) etc.
+      std::vector<Type*> Doubles(Args.size(),
+                                 Type::getDoubleTy(getGlobalContext()));
+      FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                           Doubles, false);
+
+      Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+
+      // If F conflicted, there was already something named 'Name'.  If it has a
+      // body, don't allow redefinition or reextern.
+      if (F->getName() != Name) {
+        // Delete the one we just made and get the existing one.
+        F->eraseFromParent();
+        F = TheModule->getFunction(Name);
+
+        // If F already has a body, reject this.
+        if (!F->empty()) {
+          ErrorF("redefinition of function");
+          return 0;
+        }
+
+        // If F took a different number of args, reject.
+        if (F->arg_size() != Args.size()) {
+          ErrorF("redefinition of function with different # args");
+          return 0;
+        }
+      }
+
+      // Set names for all arguments.
+      unsigned Idx = 0;
+      for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+           ++AI, ++Idx) {
+        AI->setName(Args[Idx]);
+
+        // Add arguments to variable symbol table.
+        NamedValues[Args[Idx]] = AI;
+      }
+
+      return F;
+    }
+
+    Function *FunctionAST::Codegen() {
+      NamedValues.clear();
+
+      Function *TheFunction = Proto->Codegen();
+      if (TheFunction == 0)
+        return 0;
+
+      // Create a new basic block to start insertion into.
+      BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+      Builder.SetInsertPoint(BB);
+
+      if (Value *RetVal = Body->Codegen()) {
+        // Finish off the function.
+        Builder.CreateRet(RetVal);
+
+        // Validate the generated code, checking for consistency.
+        verifyFunction(*TheFunction);
+
+        return TheFunction;
+      }
+
+      // Error reading body, remove function.
+      TheFunction->eraseFromParent();
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Top-Level parsing and JIT Driver
+    //===----------------------------------------------------------------------===//
+
+    static void HandleDefinition() {
+      if (FunctionAST *F = ParseDefinition()) {
+        if (Function *LF = F->Codegen()) {
+          fprintf(stderr, "Read function definition:");
+          LF->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleExtern() {
+      if (PrototypeAST *P = ParseExtern()) {
+        if (Function *F = P->Codegen()) {
+          fprintf(stderr, "Read extern: ");
+          F->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleTopLevelExpression() {
+      // Evaluate a top-level expression into an anonymous function.
+      if (FunctionAST *F = ParseTopLevelExpr()) {
+        if (Function *LF = F->Codegen()) {
+          fprintf(stderr, "Read top-level expression:");
+          LF->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    /// top ::= definition | external | expression | ';'
+    static void MainLoop() {
+      while (1) {
+        fprintf(stderr, "ready> ");
+        switch (CurTok) {
+        case tok_eof:    return;
+        case ';':        getNextToken(); break;  // ignore top-level semicolons.
+        case tok_def:    HandleDefinition(); break;
+        case tok_extern: HandleExtern(); break;
+        default:         HandleTopLevelExpression(); break;
+        }
+      }
+    }
+
+    //===----------------------------------------------------------------------===//
+    // "Library" functions that can be "extern'd" from user code.
+    //===----------------------------------------------------------------------===//
+
+    /// putchard - putchar that takes a double and returns 0.
+    extern "C"
+    double putchard(double X) {
+      putchar((char)X);
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Main driver code.
+    //===----------------------------------------------------------------------===//
+
+    int main() {
+      LLVMContext &Context = getGlobalContext();
+
+      // Install standard binary operators.
+      // 1 is lowest precedence.
+      BinopPrecedence['<'] = 10;
+      BinopPrecedence['+'] = 20;
+      BinopPrecedence['-'] = 20;
+      BinopPrecedence['*'] = 40;  // highest.
+
+      // Prime the first token.
+      fprintf(stderr, "ready> ");
+      getNextToken();
+
+      // Make the module, which holds all the code.
+      TheModule = new Module("my cool jit", Context);
+
+      // Run the main "interpreter loop" now.
+      MainLoop();
+
+      // Print out all of the generated code.
+      TheModule->dump();
+
+      return 0;
+    }
+
+`Next: Adding JIT and Optimizer Support <LangImpl4.html>`_
+
diff --git a/docs/tutorial/LangImpl4.html b/docs/tutorial/LangImpl4.html
deleted file mode 100644
index 53695924b2..0000000000
--- a/docs/tutorial/LangImpl4.html
+++ /dev/null
@@ -1,1152 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Adding JIT and Optimizer Support</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Adding JIT and Optimizer Support</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 4
-  <ol>
-    <li><a href="#intro">Chapter 4 Introduction</a></li>
-    <li><a href="#trivialconstfold">Trivial Constant Folding</a></li>
-    <li><a href="#optimizerpasses">LLVM Optimization Passes</a></li>
-    <li><a href="#jit">Adding a JIT Compiler</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="LangImpl5.html">Chapter 5</a>: Extending the Language: Control 
-Flow</li>
-</ul>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 4 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 4 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  Chapters 1-3 described the implementation of a simple
-language and added support for generating LLVM IR.  This chapter describes
-two new techniques: adding optimizer support to your language, and adding JIT
-compiler support.  These additions will demonstrate how to get nice, efficient code 
-for the Kaleidoscope language.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="trivialconstfold">Trivial Constant Folding</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Our demonstration for Chapter 3 is elegant and easy to extend.  Unfortunately,
-it does not produce wonderful code.  The IRBuilder, however, does give us
-obvious optimizations when compiling simple code:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def test(x) 1+2+x;</b>
-Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 3.000000e+00, %x
-        ret double %addtmp
-}
-</pre>
-</div>
-
-<p>This code is not a literal transcription of the AST built by parsing the 
-input. That would be:
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def test(x) 1+2+x;</b>
-Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 2.000000e+00, 1.000000e+00
-        %addtmp1 = fadd double %addtmp, %x
-        ret double %addtmp1
-}
-</pre>
-</div>
-
-<p>Constant folding, as seen above, in particular, is a very common and very
-important optimization: so much so that many language implementors implement
-constant folding support in their AST representation.</p>
-
-<p>With LLVM, you don't need this support in the AST.  Since all calls to build 
-LLVM IR go through the LLVM IR builder, the builder itself checked to see if 
-there was a constant folding opportunity when you call it.  If so, it just does 
-the constant fold and return the constant instead of creating an instruction.
-
-<p>Well, that was easy :).  In practice, we recommend always using
-<tt>IRBuilder</tt> when generating code like this.  It has no
-"syntactic overhead" for its use (you don't have to uglify your compiler with
-constant checks everywhere) and it can dramatically reduce the amount of
-LLVM IR that is generated in some cases (particular for languages with a macro
-preprocessor or that use a lot of constants).</p>
-
-<p>On the other hand, the <tt>IRBuilder</tt> is limited by the fact
-that it does all of its analysis inline with the code as it is built.  If you
-take a slightly more complex example:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def test(x) (1+2+x)*(x+(1+2));</b>
-ready> Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 3.000000e+00, %x
-        %addtmp1 = fadd double %x, 3.000000e+00
-        %multmp = fmul double %addtmp, %addtmp1
-        ret double %multmp
-}
-</pre>
-</div>
-
-<p>In this case, the LHS and RHS of the multiplication are the same value.  We'd
-really like to see this generate "<tt>tmp = x+3; result = tmp*tmp;</tt>" instead
-of computing "<tt>x+3</tt>" twice.</p>
-
-<p>Unfortunately, no amount of local analysis will be able to detect and correct
-this.  This requires two transformations: reassociation of expressions (to 
-make the add's lexically identical) and Common Subexpression Elimination (CSE)
-to  delete the redundant add instruction.  Fortunately, LLVM provides a broad
-range of optimizations that you can use, in the form of "passes".</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="optimizerpasses">LLVM Optimization Passes</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>LLVM provides many optimization passes, which do many different sorts of
-things and have different tradeoffs.  Unlike other systems, LLVM doesn't hold
-to the mistaken notion that one set of optimizations is right for all languages
-and for all situations.  LLVM allows a compiler implementor to make complete
-decisions about what optimizations to use, in which order, and in what
-situation.</p>
-
-<p>As a concrete example, LLVM supports both "whole module" passes, which look
-across as large of body of code as they can (often a whole file, but if run 
-at link time, this can be a substantial portion of the whole program).  It also
-supports and includes "per-function" passes which just operate on a single
-function at a time, without looking at other functions.  For more information
-on passes and how they are run, see the <a href="../WritingAnLLVMPass.html">How
-to Write a Pass</a> document and the <a href="../Passes.html">List of LLVM 
-Passes</a>.</p>
-
-<p>For Kaleidoscope, we are currently generating functions on the fly, one at
-a time, as the user types them in.  We aren't shooting for the ultimate
-optimization experience in this setting, but we also want to catch the easy and
-quick stuff where possible.  As such, we will choose to run a few per-function
-optimizations as the user types the function in.  If we wanted to make a "static
-Kaleidoscope compiler", we would use exactly the code we have now, except that
-we would defer running the optimizer until the entire file has been parsed.</p>
-
-<p>In order to get per-function optimizations going, we need to set up a
-<a href="../WritingAnLLVMPass.html#passmanager">FunctionPassManager</a> to hold and
-organize the LLVM optimizations that we want to run.  Once we have that, we can
-add a set of optimizations to run.  The code looks like this:</p>
-
-<div class="doc_code">
-<pre>
-  FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &amp;OurFPM;
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-</pre>
-</div>
-
-<p>This code defines a <tt>FunctionPassManager</tt>, "<tt>OurFPM</tt>".  It
-requires a pointer to the <tt>Module</tt> to construct itself.  Once it is set
-up, we use a series of "add" calls to add a bunch of LLVM passes.  The first
-pass is basically boilerplate, it adds a pass so that later optimizations know
-how the data structures in the program are laid out.  The
-"<tt>TheExecutionEngine</tt>" variable is related to the JIT, which we will get
-to in the next section.</p>
-
-<p>In this case, we choose to add 4 optimization passes.  The passes we chose
-here are a pretty standard set of "cleanup" optimizations that are useful for
-a wide variety of code.  I won't delve into what they do but, believe me,
-they are a good starting place :).</p>
-
-<p>Once the PassManager is set up, we need to make use of it.  We do this by
-running it after our newly created function is constructed (in 
-<tt>FunctionAST::Codegen</tt>), but before it is returned to the client:</p>
-
-<div class="doc_code">
-<pre>
-  if (Value *RetVal = Body->Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    <b>// Optimize the function.
-    TheFPM-&gt;run(*TheFunction);</b>
-    
-    return TheFunction;
-  }
-</pre>
-</div>
-
-<p>As you can see, this is pretty straightforward.  The 
-<tt>FunctionPassManager</tt> optimizes and updates the LLVM Function* in place,
-improving (hopefully) its body.  With this in place, we can try our test above
-again:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def test(x) (1+2+x)*(x+(1+2));</b>
-ready> Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double %x, 3.000000e+00
-        %multmp = fmul double %addtmp, %addtmp
-        ret double %multmp
-}
-</pre>
-</div>
-
-<p>As expected, we now get our nicely optimized code, saving a floating point
-add instruction from every execution of this function.</p>
-
-<p>LLVM provides a wide variety of optimizations that can be used in certain
-circumstances.  Some <a href="../Passes.html">documentation about the various 
-passes</a> is available, but it isn't very complete.  Another good source of
-ideas can come from looking at the passes that <tt>Clang</tt> runs to get
-started.  The "<tt>opt</tt>" tool allows you to experiment with passes from the
-command line, so you can see if they do anything.</p>
-
-<p>Now that we have reasonable code coming out of our front-end, lets talk about
-executing it!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="jit">Adding a JIT Compiler</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Code that is available in LLVM IR can have a wide variety of tools 
-applied to it.  For example, you can run optimizations on it (as we did above),
-you can dump it out in textual or binary forms, you can compile the code to an
-assembly file (.s) for some target, or you can JIT compile it.  The nice thing
-about the LLVM IR representation is that it is the "common currency" between
-many different parts of the compiler.
-</p>
-
-<p>In this section, we'll add JIT compiler support to our interpreter.  The
-basic idea that we want for Kaleidoscope is to have the user enter function
-bodies as they do now, but immediately evaluate the top-level expressions they
-type in.  For example, if they type in "1 + 2;", we should evaluate and print
-out 3.  If they define a function, they should be able to call it from the 
-command line.</p>
-
-<p>In order to do this, we first declare and initialize the JIT.  This is done
-by adding a global variable and a call in <tt>main</tt>:</p>
-
-<div class="doc_code">
-<pre>
-<b>static ExecutionEngine *TheExecutionEngine;</b>
-...
-int main() {
-  ..
-  <b>// Create the JIT.  This takes ownership of the module.
-  TheExecutionEngine = EngineBuilder(TheModule).create();</b>
-  ..
-}
-</pre>
-</div>
-
-<p>This creates an abstract "Execution Engine" which can be either a JIT
-compiler or the LLVM interpreter.  LLVM will automatically pick a JIT compiler
-for you if one is available for your platform, otherwise it will fall back to
-the interpreter.</p>
-
-<p>Once the <tt>ExecutionEngine</tt> is created, the JIT is ready to be used.
-There are a variety of APIs that are useful, but the simplest one is the
-"<tt>getPointerToFunction(F)</tt>" method.  This method JIT compiles the
-specified LLVM Function and returns a function pointer to the generated machine
-code.  In our case, this means that we can change the code that parses a
-top-level expression to look like this:</p>
-
-<div class="doc_code">
-<pre>
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      LF->dump();  // Dump the function for exposition purposes.
-    
-      <b>// JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine-&gt;getPointerToFunction(LF);
-      
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
-      fprintf(stderr, "Evaluated to %f\n", FP());</b>
-    }
-</pre>
-</div>
-
-<p>Recall that we compile top-level expressions into a self-contained LLVM
-function that takes no arguments and returns the computed double.  Because the 
-LLVM JIT compiler matches the native platform ABI, this means that you can just
-cast the result pointer to a function pointer of that type and call it directly.
-This means, there is no difference between JIT compiled code and native machine
-code that is statically linked into your application.</p>
-
-<p>With just these two changes, lets see how Kaleidoscope works now!</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>4+5;</b>
-Read top-level expression:
-define double @0() {
-entry:
-  ret double 9.000000e+00
-}
-
-<em>Evaluated to 9.000000</em>
-</pre>
-</div>
-
-<p>Well this looks like it is basically working.  The dump of the function
-shows the "no argument function that always returns double" that we synthesize
-for each top-level expression that is typed in.  This demonstrates very basic
-functionality, but can we do more?</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def testfunc(x y) x + y*2; </b> 
-Read function definition:
-define double @testfunc(double %x, double %y) {
-entry:
-  %multmp = fmul double %y, 2.000000e+00
-  %addtmp = fadd double %multmp, %x
-  ret double %addtmp
-}
-
-ready&gt; <b>testfunc(4, 10);</b>
-Read top-level expression:
-define double @1() {
-entry:
-  %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01)
-  ret double %calltmp
-}
-
-<em>Evaluated to 24.000000</em>
-</pre>
-</div>
-
-<p>This illustrates that we can now call user code, but there is something a bit
-subtle going on here.  Note that we only invoke the JIT on the anonymous
-functions that <em>call testfunc</em>, but we never invoked it
-on <em>testfunc</em> itself.  What actually happened here is that the JIT
-scanned for all non-JIT'd functions transitively called from the anonymous
-function and compiled all of them before returning
-from <tt>getPointerToFunction()</tt>.</p>
-
-<p>The JIT provides a number of other more advanced interfaces for things like
-freeing allocated machine code, rejit'ing functions to update them, etc.
-However, even with this simple code, we get some surprisingly powerful
-capabilities - check this out (I removed the dump of the anonymous functions,
-you should get the idea by now :) :</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>extern sin(x);</b>
-Read extern: 
-declare double @sin(double)
-
-ready&gt; <b>extern cos(x);</b>
-Read extern: 
-declare double @cos(double)
-
-ready&gt; <b>sin(1.0);</b>
-Read top-level expression:
-define double @2() {
-entry:
-  ret double 0x3FEAED548F090CEE
-}
-
-<em>Evaluated to 0.841471</em>
-
-ready&gt; <b>def foo(x) sin(x)*sin(x) + cos(x)*cos(x);</b>
-Read function definition:
-define double @foo(double %x) {
-entry:
-  %calltmp = call double @sin(double %x)
-  %multmp = fmul double %calltmp, %calltmp
-  %calltmp2 = call double @cos(double %x)
-  %multmp4 = fmul double %calltmp2, %calltmp2
-  %addtmp = fadd double %multmp, %multmp4
-  ret double %addtmp
-}
-
-ready&gt; <b>foo(4.0);</b>
-Read top-level expression:
-define double @3() {
-entry:
-  %calltmp = call double @foo(double 4.000000e+00)
-  ret double %calltmp
-}
-
-<em>Evaluated to 1.000000</em>
-</pre>
-</div>
-
-<p>Whoa, how does the JIT know about sin and cos?  The answer is surprisingly
-simple: in this
-example, the JIT started execution of a function and got to a function call.  It
-realized that the function was not yet JIT compiled and invoked the standard set
-of routines to resolve the function.  In this case, there is no body defined
-for the function, so the JIT ended up calling "<tt>dlsym("sin")</tt>" on the
-Kaleidoscope process itself.
-Since "<tt>sin</tt>" is defined within the JIT's address space, it simply
-patches up calls in the module to call the libm version of <tt>sin</tt>
-directly.</p>
-
-<p>The LLVM JIT provides a number of interfaces (look in the 
-<tt>ExecutionEngine.h</tt> file) for controlling how unknown functions get
-resolved.  It allows you to establish explicit mappings between IR objects and
-addresses (useful for LLVM global variables that you want to map to static
-tables, for example), allows you to dynamically decide on the fly based on the
-function name, and even allows you to have the JIT compile functions lazily the
-first time they're called.</p>
-
-<p>One interesting application of this is that we can now extend the language
-by writing arbitrary C++ code to implement operations.  For example, if we add:
-</p>
-
-<div class="doc_code">
-<pre>
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-</pre>
-</div>
-
-<p>Now we can produce simple output to the console by using things like:
-"<tt>extern putchard(x); putchard(120);</tt>", which prints a lowercase 'x' on
-the console (120 is the ASCII code for 'x').  Similar code could be used to 
-implement file I/O, console input, and many other capabilities in
-Kaleidoscope.</p>
-
-<p>This completes the JIT and optimizer chapter of the Kaleidoscope tutorial. At
-this point, we can compile a non-Turing-complete programming language, optimize
-and JIT compile it in a user-driven way.  Next up we'll look into <a 
-href="LangImpl5.html">extending the language with control flow constructs</a>,
-tackling some interesting LLVM IR issues along the way.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with the
-LLVM JIT and optimizer.  To build this example, use:
-</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
-# Run
-./toy
-</pre>
-</div>
-
-<p>
-If you are compiling this on Linux, make sure to add the "-rdynamic" option 
-as well.  This makes sure that the external functions are resolved properly 
-at runtime.</p>
-
-<p>Here is the code:</p>
-
-<div class="doc_code">
-<pre>
-#include "llvm/DerivedTypes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/TargetSelect.h"
-#include &lt;cstdio&gt;
-#include &lt;string&gt;
-#include &lt;map&gt;
-#include &lt;vector&gt;
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &amp;name) : Name(name) {}
-  virtual Value *Codegen();
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector&lt;ExprAST*&gt; Args;
-public:
-  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
-    : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes).
-class PrototypeAST {
-  std::string Name;
-  std::vector&lt;std::string&gt; Args;
-public:
-  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args)
-    : Name(name), Args(args) {}
-  
-  Function *Codegen();
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-  Function *Codegen();
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map&lt;char, int&gt; BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec &lt;= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector&lt;ExprAST*&gt; Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  }
-}
-
-/// binoprhs
-///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec &lt; ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec &lt; NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= primary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
-  if (CurTok != tok_identifier)
-    return ErrorP("Expected function name in prototype");
-
-  std::string FnName = IdentifierStr;
-  getNextToken();
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector&lt;std::string&gt; ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  return new PrototypeAST(FnName, ArgNames);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Code Generation
-//===----------------------------------------------------------------------===//
-
-static Module *TheModule;
-static IRBuilder&lt;&gt; Builder(getGlobalContext());
-static std::map&lt;std::string, Value*&gt; NamedValues;
-static FunctionPassManager *TheFPM;
-
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
-}
-
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS-&gt;Codegen();
-  Value *R = RHS-&gt;Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '&lt;':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: return ErrorV("invalid binary operator");
-  }
-}
-
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule-&gt;getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF-&gt;arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector&lt;Value*&gt; ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]-&gt;Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector&lt;Type*&gt; Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-  
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-  
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F-&gt;getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F-&gt;eraseFromParent();
-    F = TheModule-&gt;getFunction(Name);
-    
-    // If F already has a body, reject this.
-    if (!F-&gt;empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F-&gt;arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-  
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI-&gt;setName(Args[Idx]);
-    
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
-  
-  return F;
-}
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto-&gt;Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body-&gt;Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    // Optimize the function.
-    TheFPM-&gt;run(*TheFunction);
-    
-    return TheFunction;
-  }
-  
-  // Error reading body, remove function.
-  TheFunction-&gt;eraseFromParent();
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing and JIT Driver
-//===----------------------------------------------------------------------===//
-
-static ExecutionEngine *TheExecutionEngine;
-
-static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      fprintf(stderr, "Read function definition:");
-      LF-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P-&gt;Codegen()) {
-      fprintf(stderr, "Read extern: ");
-      F-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      fprintf(stderr, "Read top-level expression:");
-      LF->dump();
-
-      // JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine-&gt;getPointerToFunction(LF);
-      
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
-      fprintf(stderr, "Evaluated to %f\n", FP());
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready&gt; ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// "Library" functions that can be "extern'd" from user code.
-//===----------------------------------------------------------------------===//
-
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  InitializeNativeTarget();
-  LLVMContext &amp;Context = getGlobalContext();
-
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['&lt;'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready&gt; ");
-  getNextToken();
-
-  // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit", Context);
-
-  // Create the JIT.  This takes ownership of the module.
-  std::string ErrStr;
-  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&amp;ErrStr).create();
-  if (!TheExecutionEngine) {
-    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
-    exit(1);
-  }
-
-  FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  OurFPM.add(new DataLayout(*TheExecutionEngine-&gt;getDataLayout()));
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &amp;OurFPM;
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  TheFPM = 0;
-
-  // Print out all of the generated code.
-  TheModule-&gt;dump();
-
-  return 0;
-}
-</pre>
-</div>
-
-<a href="LangImpl5.html">Next: Extending the language: control flow</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/LangImpl4.rst b/docs/tutorial/LangImpl4.rst
new file mode 100644
index 0000000000..8484c57f9d
--- /dev/null
+++ b/docs/tutorial/LangImpl4.rst
@@ -0,0 +1,1063 @@
+==============================================
+Kaleidoscope: Adding JIT and Optimizer Support
+==============================================
+
+.. contents::
+   :local:
+
+Written by `Chris Lattner <mailto:sabre@nondot.org>`_
+
+Chapter 4 Introduction
+======================
+
+Welcome to Chapter 4 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. Chapters 1-3 described the implementation
+of a simple language and added support for generating LLVM IR. This
+chapter describes two new techniques: adding optimizer support to your
+language, and adding JIT compiler support. These additions will
+demonstrate how to get nice, efficient code for the Kaleidoscope
+language.
+
+Trivial Constant Folding
+========================
+
+Our demonstration for Chapter 3 is elegant and easy to extend.
+Unfortunately, it does not produce wonderful code. The IRBuilder,
+however, does give us obvious optimizations when compiling simple code:
+
+::
+
+    ready> def test(x) 1+2+x;
+    Read function definition:
+    define double @test(double %x) {
+    entry:
+            %addtmp = fadd double 3.000000e+00, %x
+            ret double %addtmp
+    }
+
+This code is not a literal transcription of the AST built by parsing the
+input. That would be:
+
+::
+
+    ready> def test(x) 1+2+x;
+    Read function definition:
+    define double @test(double %x) {
+    entry:
+            %addtmp = fadd double 2.000000e+00, 1.000000e+00
+            %addtmp1 = fadd double %addtmp, %x
+            ret double %addtmp1
+    }
+
+Constant folding, as seen above, in particular, is a very common and
+very important optimization: so much so that many language implementors
+implement constant folding support in their AST representation.
+
+With LLVM, you don't need this support in the AST. Since all calls to
+build LLVM IR go through the LLVM IR builder, the builder itself checked
+to see if there was a constant folding opportunity when you call it. If
+so, it just does the constant fold and return the constant instead of
+creating an instruction.
+
+Well, that was easy :). In practice, we recommend always using
+``IRBuilder`` when generating code like this. It has no "syntactic
+overhead" for its use (you don't have to uglify your compiler with
+constant checks everywhere) and it can dramatically reduce the amount of
+LLVM IR that is generated in some cases (particular for languages with a
+macro preprocessor or that use a lot of constants).
+
+On the other hand, the ``IRBuilder`` is limited by the fact that it does
+all of its analysis inline with the code as it is built. If you take a
+slightly more complex example:
+
+::
+
+    ready> def test(x) (1+2+x)*(x+(1+2));
+    ready> Read function definition:
+    define double @test(double %x) {
+    entry:
+            %addtmp = fadd double 3.000000e+00, %x
+            %addtmp1 = fadd double %x, 3.000000e+00
+            %multmp = fmul double %addtmp, %addtmp1
+            ret double %multmp
+    }
+
+In this case, the LHS and RHS of the multiplication are the same value.
+We'd really like to see this generate "``tmp = x+3; result = tmp*tmp;``"
+instead of computing "``x+3``" twice.
+
+Unfortunately, no amount of local analysis will be able to detect and
+correct this. This requires two transformations: reassociation of
+expressions (to make the add's lexically identical) and Common
+Subexpression Elimination (CSE) to delete the redundant add instruction.
+Fortunately, LLVM provides a broad range of optimizations that you can
+use, in the form of "passes".
+
+LLVM Optimization Passes
+========================
+
+LLVM provides many optimization passes, which do many different sorts of
+things and have different tradeoffs. Unlike other systems, LLVM doesn't
+hold to the mistaken notion that one set of optimizations is right for
+all languages and for all situations. LLVM allows a compiler implementor
+to make complete decisions about what optimizations to use, in which
+order, and in what situation.
+
+As a concrete example, LLVM supports both "whole module" passes, which
+look across as large of body of code as they can (often a whole file,
+but if run at link time, this can be a substantial portion of the whole
+program). It also supports and includes "per-function" passes which just
+operate on a single function at a time, without looking at other
+functions. For more information on passes and how they are run, see the
+`How to Write a Pass <../WritingAnLLVMPass.html>`_ document and the
+`List of LLVM Passes <../Passes.html>`_.
+
+For Kaleidoscope, we are currently generating functions on the fly, one
+at a time, as the user types them in. We aren't shooting for the
+ultimate optimization experience in this setting, but we also want to
+catch the easy and quick stuff where possible. As such, we will choose
+to run a few per-function optimizations as the user types the function
+in. If we wanted to make a "static Kaleidoscope compiler", we would use
+exactly the code we have now, except that we would defer running the
+optimizer until the entire file has been parsed.
+
+In order to get per-function optimizations going, we need to set up a
+`FunctionPassManager <../WritingAnLLVMPass.html#passmanager>`_ to hold
+and organize the LLVM optimizations that we want to run. Once we have
+that, we can add a set of optimizations to run. The code looks like
+this:
+
+.. code-block:: c++
+
+      FunctionPassManager OurFPM(TheModule);
+
+      // Set up the optimizer pipeline.  Start with registering info about how the
+      // target lays out data structures.
+      OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
+      // Provide basic AliasAnalysis support for GVN.
+      OurFPM.add(createBasicAliasAnalysisPass());
+      // Do simple "peephole" optimizations and bit-twiddling optzns.
+      OurFPM.add(createInstructionCombiningPass());
+      // Reassociate expressions.
+      OurFPM.add(createReassociatePass());
+      // Eliminate Common SubExpressions.
+      OurFPM.add(createGVNPass());
+      // Simplify the control flow graph (deleting unreachable blocks, etc).
+      OurFPM.add(createCFGSimplificationPass());
+
+      OurFPM.doInitialization();
+
+      // Set the global so the code gen can use this.
+      TheFPM = &OurFPM;
+
+      // Run the main "interpreter loop" now.
+      MainLoop();
+
+This code defines a ``FunctionPassManager``, "``OurFPM``". It requires a
+pointer to the ``Module`` to construct itself. Once it is set up, we use
+a series of "add" calls to add a bunch of LLVM passes. The first pass is
+basically boilerplate, it adds a pass so that later optimizations know
+how the data structures in the program are laid out. The
+"``TheExecutionEngine``" variable is related to the JIT, which we will
+get to in the next section.
+
+In this case, we choose to add 4 optimization passes. The passes we
+chose here are a pretty standard set of "cleanup" optimizations that are
+useful for a wide variety of code. I won't delve into what they do but,
+believe me, they are a good starting place :).
+
+Once the PassManager is set up, we need to make use of it. We do this by
+running it after our newly created function is constructed (in
+``FunctionAST::Codegen``), but before it is returned to the client:
+
+.. code-block:: c++
+
+      if (Value *RetVal = Body->Codegen()) {
+        // Finish off the function.
+        Builder.CreateRet(RetVal);
+
+        // Validate the generated code, checking for consistency.
+        verifyFunction(*TheFunction);
+
+        // Optimize the function.
+        TheFPM->run(*TheFunction);
+
+        return TheFunction;
+      }
+
+As you can see, this is pretty straightforward. The
+``FunctionPassManager`` optimizes and updates the LLVM Function\* in
+place, improving (hopefully) its body. With this in place, we can try
+our test above again:
+
+::
+
+    ready> def test(x) (1+2+x)*(x+(1+2));
+    ready> Read function definition:
+    define double @test(double %x) {
+    entry:
+            %addtmp = fadd double %x, 3.000000e+00
+            %multmp = fmul double %addtmp, %addtmp
+            ret double %multmp
+    }
+
+As expected, we now get our nicely optimized code, saving a floating
+point add instruction from every execution of this function.
+
+LLVM provides a wide variety of optimizations that can be used in
+certain circumstances. Some `documentation about the various
+passes <../Passes.html>`_ is available, but it isn't very complete.
+Another good source of ideas can come from looking at the passes that
+``Clang`` runs to get started. The "``opt``" tool allows you to
+experiment with passes from the command line, so you can see if they do
+anything.
+
+Now that we have reasonable code coming out of our front-end, lets talk
+about executing it!
+
+Adding a JIT Compiler
+=====================
+
+Code that is available in LLVM IR can have a wide variety of tools
+applied to it. For example, you can run optimizations on it (as we did
+above), you can dump it out in textual or binary forms, you can compile
+the code to an assembly file (.s) for some target, or you can JIT
+compile it. The nice thing about the LLVM IR representation is that it
+is the "common currency" between many different parts of the compiler.
+
+In this section, we'll add JIT compiler support to our interpreter. The
+basic idea that we want for Kaleidoscope is to have the user enter
+function bodies as they do now, but immediately evaluate the top-level
+expressions they type in. For example, if they type in "1 + 2;", we
+should evaluate and print out 3. If they define a function, they should
+be able to call it from the command line.
+
+In order to do this, we first declare and initialize the JIT. This is
+done by adding a global variable and a call in ``main``:
+
+.. code-block:: c++
+
+    static ExecutionEngine *TheExecutionEngine;
+    ...
+    int main() {
+      ..
+      // Create the JIT.  This takes ownership of the module.
+      TheExecutionEngine = EngineBuilder(TheModule).create();
+      ..
+    }
+
+This creates an abstract "Execution Engine" which can be either a JIT
+compiler or the LLVM interpreter. LLVM will automatically pick a JIT
+compiler for you if one is available for your platform, otherwise it
+will fall back to the interpreter.
+
+Once the ``ExecutionEngine`` is created, the JIT is ready to be used.
+There are a variety of APIs that are useful, but the simplest one is the
+"``getPointerToFunction(F)``" method. This method JIT compiles the
+specified LLVM Function and returns a function pointer to the generated
+machine code. In our case, this means that we can change the code that
+parses a top-level expression to look like this:
+
+.. code-block:: c++
+
+    static void HandleTopLevelExpression() {
+      // Evaluate a top-level expression into an anonymous function.
+      if (FunctionAST *F = ParseTopLevelExpr()) {
+        if (Function *LF = F->Codegen()) {
+          LF->dump();  // Dump the function for exposition purposes.
+
+          // JIT the function, returning a function pointer.
+          void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+
+          // Cast it to the right type (takes no arguments, returns a double) so we
+          // can call it as a native function.
+          double (*FP)() = (double (*)())(intptr_t)FPtr;
+          fprintf(stderr, "Evaluated to %f\n", FP());
+        }
+
+Recall that we compile top-level expressions into a self-contained LLVM
+function that takes no arguments and returns the computed double.
+Because the LLVM JIT compiler matches the native platform ABI, this
+means that you can just cast the result pointer to a function pointer of
+that type and call it directly. This means, there is no difference
+between JIT compiled code and native machine code that is statically
+linked into your application.
+
+With just these two changes, lets see how Kaleidoscope works now!
+
+::
+
+    ready> 4+5;
+    Read top-level expression:
+    define double @0() {
+    entry:
+      ret double 9.000000e+00
+    }
+
+    Evaluated to 9.000000
+
+Well this looks like it is basically working. The dump of the function
+shows the "no argument function that always returns double" that we
+synthesize for each top-level expression that is typed in. This
+demonstrates very basic functionality, but can we do more?
+
+::
+
+    ready> def testfunc(x y) x + y*2;
+    Read function definition:
+    define double @testfunc(double %x, double %y) {
+    entry:
+      %multmp = fmul double %y, 2.000000e+00
+      %addtmp = fadd double %multmp, %x
+      ret double %addtmp
+    }
+
+    ready> testfunc(4, 10);
+    Read top-level expression:
+    define double @1() {
+    entry:
+      %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01)
+      ret double %calltmp
+    }
+
+    Evaluated to 24.000000
+
+This illustrates that we can now call user code, but there is something
+a bit subtle going on here. Note that we only invoke the JIT on the
+anonymous functions that *call testfunc*, but we never invoked it on
+*testfunc* itself. What actually happened here is that the JIT scanned
+for all non-JIT'd functions transitively called from the anonymous
+function and compiled all of them before returning from
+``getPointerToFunction()``.
+
+The JIT provides a number of other more advanced interfaces for things
+like freeing allocated machine code, rejit'ing functions to update them,
+etc. However, even with this simple code, we get some surprisingly
+powerful capabilities - check this out (I removed the dump of the
+anonymous functions, you should get the idea by now :) :
+
+::
+
+    ready> extern sin(x);
+    Read extern:
+    declare double @sin(double)
+
+    ready> extern cos(x);
+    Read extern:
+    declare double @cos(double)
+
+    ready> sin(1.0);
+    Read top-level expression:
+    define double @2() {
+    entry:
+      ret double 0x3FEAED548F090CEE
+    }
+
+    Evaluated to 0.841471
+
+    ready> def foo(x) sin(x)*sin(x) + cos(x)*cos(x);
+    Read function definition:
+    define double @foo(double %x) {
+    entry:
+      %calltmp = call double @sin(double %x)
+      %multmp = fmul double %calltmp, %calltmp
+      %calltmp2 = call double @cos(double %x)
+      %multmp4 = fmul double %calltmp2, %calltmp2
+      %addtmp = fadd double %multmp, %multmp4
+      ret double %addtmp
+    }
+
+    ready> foo(4.0);
+    Read top-level expression:
+    define double @3() {
+    entry:
+      %calltmp = call double @foo(double 4.000000e+00)
+      ret double %calltmp
+    }
+
+    Evaluated to 1.000000
+
+Whoa, how does the JIT know about sin and cos? The answer is
+surprisingly simple: in this example, the JIT started execution of a
+function and got to a function call. It realized that the function was
+not yet JIT compiled and invoked the standard set of routines to resolve
+the function. In this case, there is no body defined for the function,
+so the JIT ended up calling "``dlsym("sin")``" on the Kaleidoscope
+process itself. Since "``sin``" is defined within the JIT's address
+space, it simply patches up calls in the module to call the libm version
+of ``sin`` directly.
+
+The LLVM JIT provides a number of interfaces (look in the
+``ExecutionEngine.h`` file) for controlling how unknown functions get
+resolved. It allows you to establish explicit mappings between IR
+objects and addresses (useful for LLVM global variables that you want to
+map to static tables, for example), allows you to dynamically decide on
+the fly based on the function name, and even allows you to have the JIT
+compile functions lazily the first time they're called.
+
+One interesting application of this is that we can now extend the
+language by writing arbitrary C++ code to implement operations. For
+example, if we add:
+
+.. code-block:: c++
+
+    /// putchard - putchar that takes a double and returns 0.
+    extern "C"
+    double putchard(double X) {
+      putchar((char)X);
+      return 0;
+    }
+
+Now we can produce simple output to the console by using things like:
+"``extern putchard(x); putchard(120);``", which prints a lowercase 'x'
+on the console (120 is the ASCII code for 'x'). Similar code could be
+used to implement file I/O, console input, and many other capabilities
+in Kaleidoscope.
+
+This completes the JIT and optimizer chapter of the Kaleidoscope
+tutorial. At this point, we can compile a non-Turing-complete
+programming language, optimize and JIT compile it in a user-driven way.
+Next up we'll look into `extending the language with control flow
+constructs <LangImpl5.html>`_, tackling some interesting LLVM IR issues
+along the way.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+the LLVM JIT and optimizer. To build this example, use:
+
+.. code-block:: bash
+
+    # Compile
+    clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
+    # Run
+    ./toy
+
+If you are compiling this on Linux, make sure to add the "-rdynamic"
+option as well. This makes sure that the external functions are resolved
+properly at runtime.
+
+Here is the code:
+
+.. code-block:: c++
+
+    #include "llvm/DerivedTypes.h"
+    #include "llvm/ExecutionEngine/ExecutionEngine.h"
+    #include "llvm/ExecutionEngine/JIT.h"
+    #include "llvm/IRBuilder.h"
+    #include "llvm/LLVMContext.h"
+    #include "llvm/Module.h"
+    #include "llvm/PassManager.h"
+    #include "llvm/Analysis/Verifier.h"
+    #include "llvm/Analysis/Passes.h"
+    #include "llvm/DataLayout.h"
+    #include "llvm/Transforms/Scalar.h"
+    #include "llvm/Support/TargetSelect.h"
+    #include <cstdio>
+    #include <string>
+    #include <map>
+    #include <vector>
+    using namespace llvm;
+
+    //===----------------------------------------------------------------------===//
+    // Lexer
+    //===----------------------------------------------------------------------===//
+
+    // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+    // of these for known things.
+    enum Token {
+      tok_eof = -1,
+
+      // commands
+      tok_def = -2, tok_extern = -3,
+
+      // primary
+      tok_identifier = -4, tok_number = -5
+    };
+
+    static std::string IdentifierStr;  // Filled in if tok_identifier
+    static double NumVal;              // Filled in if tok_number
+
+    /// gettok - Return the next token from standard input.
+    static int gettok() {
+      static int LastChar = ' ';
+
+      // Skip any whitespace.
+      while (isspace(LastChar))
+        LastChar = getchar();
+
+      if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+        IdentifierStr = LastChar;
+        while (isalnum((LastChar = getchar())))
+          IdentifierStr += LastChar;
+
+        if (IdentifierStr == "def") return tok_def;
+        if (IdentifierStr == "extern") return tok_extern;
+        return tok_identifier;
+      }
+
+      if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+        std::string NumStr;
+        do {
+          NumStr += LastChar;
+          LastChar = getchar();
+        } while (isdigit(LastChar) || LastChar == '.');
+
+        NumVal = strtod(NumStr.c_str(), 0);
+        return tok_number;
+      }
+
+      if (LastChar == '#') {
+        // Comment until end of line.
+        do LastChar = getchar();
+        while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+        if (LastChar != EOF)
+          return gettok();
+      }
+
+      // Check for end of file.  Don't eat the EOF.
+      if (LastChar == EOF)
+        return tok_eof;
+
+      // Otherwise, just return the character as its ascii value.
+      int ThisChar = LastChar;
+      LastChar = getchar();
+      return ThisChar;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Abstract Syntax Tree (aka Parse Tree)
+    //===----------------------------------------------------------------------===//
+
+    /// ExprAST - Base class for all expression nodes.
+    class ExprAST {
+    public:
+      virtual ~ExprAST() {}
+      virtual Value *Codegen() = 0;
+    };
+
+    /// NumberExprAST - Expression class for numeric literals like "1.0".
+    class NumberExprAST : public ExprAST {
+      double Val;
+    public:
+      NumberExprAST(double val) : Val(val) {}
+      virtual Value *Codegen();
+    };
+
+    /// VariableExprAST - Expression class for referencing a variable, like "a".
+    class VariableExprAST : public ExprAST {
+      std::string Name;
+    public:
+      VariableExprAST(const std::string &name) : Name(name) {}
+      virtual Value *Codegen();
+    };
+
+    /// BinaryExprAST - Expression class for a binary operator.
+    class BinaryExprAST : public ExprAST {
+      char Op;
+      ExprAST *LHS, *RHS;
+    public:
+      BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
+        : Op(op), LHS(lhs), RHS(rhs) {}
+      virtual Value *Codegen();
+    };
+
+    /// CallExprAST - Expression class for function calls.
+    class CallExprAST : public ExprAST {
+      std::string Callee;
+      std::vector<ExprAST*> Args;
+    public:
+      CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+        : Callee(callee), Args(args) {}
+      virtual Value *Codegen();
+    };
+
+    /// PrototypeAST - This class represents the "prototype" for a function,
+    /// which captures its name, and its argument names (thus implicitly the number
+    /// of arguments the function takes).
+    class PrototypeAST {
+      std::string Name;
+      std::vector<std::string> Args;
+    public:
+      PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+        : Name(name), Args(args) {}
+
+      Function *Codegen();
+    };
+
+    /// FunctionAST - This class represents a function definition itself.
+    class FunctionAST {
+      PrototypeAST *Proto;
+      ExprAST *Body;
+    public:
+      FunctionAST(PrototypeAST *proto, ExprAST *body)
+        : Proto(proto), Body(body) {}
+
+      Function *Codegen();
+    };
+
+    //===----------------------------------------------------------------------===//
+    // Parser
+    //===----------------------------------------------------------------------===//
+
+    /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+    /// token the parser is looking at.  getNextToken reads another token from the
+    /// lexer and updates CurTok with its results.
+    static int CurTok;
+    static int getNextToken() {
+      return CurTok = gettok();
+    }
+
+    /// BinopPrecedence - This holds the precedence for each binary operator that is
+    /// defined.
+    static std::map<char, int> BinopPrecedence;
+
+    /// GetTokPrecedence - Get the precedence of the pending binary operator token.
+    static int GetTokPrecedence() {
+      if (!isascii(CurTok))
+        return -1;
+
+      // Make sure it's a declared binop.
+      int TokPrec = BinopPrecedence[CurTok];
+      if (TokPrec <= 0) return -1;
+      return TokPrec;
+    }
+
+    /// Error* - These are little helper functions for error handling.
+    ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+    PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+    FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+    static ExprAST *ParseExpression();
+
+    /// identifierexpr
+    ///   ::= identifier
+    ///   ::= identifier '(' expression* ')'
+    static ExprAST *ParseIdentifierExpr() {
+      std::string IdName = IdentifierStr;
+
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '(') // Simple variable ref.
+        return new VariableExprAST(IdName);
+
+      // Call.
+      getNextToken();  // eat (
+      std::vector<ExprAST*> Args;
+      if (CurTok != ')') {
+        while (1) {
+          ExprAST *Arg = ParseExpression();
+          if (!Arg) return 0;
+          Args.push_back(Arg);
+
+          if (CurTok == ')') break;
+
+          if (CurTok != ',')
+            return Error("Expected ')' or ',' in argument list");
+          getNextToken();
+        }
+      }
+
+      // Eat the ')'.
+      getNextToken();
+
+      return new CallExprAST(IdName, Args);
+    }
+
+    /// numberexpr ::= number
+    static ExprAST *ParseNumberExpr() {
+      ExprAST *Result = new NumberExprAST(NumVal);
+      getNextToken(); // consume the number
+      return Result;
+    }
+
+    /// parenexpr ::= '(' expression ')'
+    static ExprAST *ParseParenExpr() {
+      getNextToken();  // eat (.
+      ExprAST *V = ParseExpression();
+      if (!V) return 0;
+
+      if (CurTok != ')')
+        return Error("expected ')'");
+      getNextToken();  // eat ).
+      return V;
+    }
+
+    /// primary
+    ///   ::= identifierexpr
+    ///   ::= numberexpr
+    ///   ::= parenexpr
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      }
+    }
+
+    /// binoprhs
+    ///   ::= ('+' primary)*
+    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+      // If this is a binop, find its precedence.
+      while (1) {
+        int TokPrec = GetTokPrecedence();
+
+        // If this is a binop that binds at least as tightly as the current binop,
+        // consume it, otherwise we are done.
+        if (TokPrec < ExprPrec)
+          return LHS;
+
+        // Okay, we know this is a binop.
+        int BinOp = CurTok;
+        getNextToken();  // eat binop
+
+        // Parse the primary expression after the binary operator.
+        ExprAST *RHS = ParsePrimary();
+        if (!RHS) return 0;
+
+        // If BinOp binds less tightly with RHS than the operator after RHS, let
+        // the pending operator take RHS as its LHS.
+        int NextPrec = GetTokPrecedence();
+        if (TokPrec < NextPrec) {
+          RHS = ParseBinOpRHS(TokPrec+1, RHS);
+          if (RHS == 0) return 0;
+        }
+
+        // Merge LHS/RHS.
+        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+      }
+    }
+
+    /// expression
+    ///   ::= primary binoprhs
+    ///
+    static ExprAST *ParseExpression() {
+      ExprAST *LHS = ParsePrimary();
+      if (!LHS) return 0;
+
+      return ParseBinOpRHS(0, LHS);
+    }
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    static PrototypeAST *ParsePrototype() {
+      if (CurTok != tok_identifier)
+        return ErrorP("Expected function name in prototype");
+
+      std::string FnName = IdentifierStr;
+      getNextToken();
+
+      if (CurTok != '(')
+        return ErrorP("Expected '(' in prototype");
+
+      std::vector<std::string> ArgNames;
+      while (getNextToken() == tok_identifier)
+        ArgNames.push_back(IdentifierStr);
+      if (CurTok != ')')
+        return ErrorP("Expected ')' in prototype");
+
+      // success.
+      getNextToken();  // eat ')'.
+
+      return new PrototypeAST(FnName, ArgNames);
+    }
+
+    /// definition ::= 'def' prototype expression
+    static FunctionAST *ParseDefinition() {
+      getNextToken();  // eat def.
+      PrototypeAST *Proto = ParsePrototype();
+      if (Proto == 0) return 0;
+
+      if (ExprAST *E = ParseExpression())
+        return new FunctionAST(Proto, E);
+      return 0;
+    }
+
+    /// toplevelexpr ::= expression
+    static FunctionAST *ParseTopLevelExpr() {
+      if (ExprAST *E = ParseExpression()) {
+        // Make an anonymous proto.
+        PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+        return new FunctionAST(Proto, E);
+      }
+      return 0;
+    }
+
+    /// external ::= 'extern' prototype
+    static PrototypeAST *ParseExtern() {
+      getNextToken();  // eat extern.
+      return ParsePrototype();
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Code Generation
+    //===----------------------------------------------------------------------===//
+
+    static Module *TheModule;
+    static IRBuilder<> Builder(getGlobalContext());
+    static std::map<std::string, Value*> NamedValues;
+    static FunctionPassManager *TheFPM;
+
+    Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+    Value *NumberExprAST::Codegen() {
+      return ConstantFP::get(getGlobalContext(), APFloat(Val));
+    }
+
+    Value *VariableExprAST::Codegen() {
+      // Look this variable up in the function.
+      Value *V = NamedValues[Name];
+      return V ? V : ErrorV("Unknown variable name");
+    }
+
+    Value *BinaryExprAST::Codegen() {
+      Value *L = LHS->Codegen();
+      Value *R = RHS->Codegen();
+      if (L == 0 || R == 0) return 0;
+
+      switch (Op) {
+      case '+': return Builder.CreateFAdd(L, R, "addtmp");
+      case '-': return Builder.CreateFSub(L, R, "subtmp");
+      case '*': return Builder.CreateFMul(L, R, "multmp");
+      case '<':
+        L = Builder.CreateFCmpULT(L, R, "cmptmp");
+        // Convert bool 0/1 to double 0.0 or 1.0
+        return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                    "booltmp");
+      default: return ErrorV("invalid binary operator");
+      }
+    }
+
+    Value *CallExprAST::Codegen() {
+      // Look up the name in the global module table.
+      Function *CalleeF = TheModule->getFunction(Callee);
+      if (CalleeF == 0)
+        return ErrorV("Unknown function referenced");
+
+      // If argument mismatch error.
+      if (CalleeF->arg_size() != Args.size())
+        return ErrorV("Incorrect # arguments passed");
+
+      std::vector<Value*> ArgsV;
+      for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+        ArgsV.push_back(Args[i]->Codegen());
+        if (ArgsV.back() == 0) return 0;
+      }
+
+      return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
+    }
+
+    Function *PrototypeAST::Codegen() {
+      // Make the function type:  double(double,double) etc.
+      std::vector<Type*> Doubles(Args.size(),
+                                 Type::getDoubleTy(getGlobalContext()));
+      FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                           Doubles, false);
+
+      Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+
+      // If F conflicted, there was already something named 'Name'.  If it has a
+      // body, don't allow redefinition or reextern.
+      if (F->getName() != Name) {
+        // Delete the one we just made and get the existing one.
+        F->eraseFromParent();
+        F = TheModule->getFunction(Name);
+
+        // If F already has a body, reject this.
+        if (!F->empty()) {
+          ErrorF("redefinition of function");
+          return 0;
+        }
+
+        // If F took a different number of args, reject.
+        if (F->arg_size() != Args.size()) {
+          ErrorF("redefinition of function with different # args");
+          return 0;
+        }
+      }
+
+      // Set names for all arguments.
+      unsigned Idx = 0;
+      for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+           ++AI, ++Idx) {
+        AI->setName(Args[Idx]);
+
+        // Add arguments to variable symbol table.
+        NamedValues[Args[Idx]] = AI;
+      }
+
+      return F;
+    }
+
+    Function *FunctionAST::Codegen() {
+      NamedValues.clear();
+
+      Function *TheFunction = Proto->Codegen();
+      if (TheFunction == 0)
+        return 0;
+
+      // Create a new basic block to start insertion into.
+      BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+      Builder.SetInsertPoint(BB);
+
+      if (Value *RetVal = Body->Codegen()) {
+        // Finish off the function.
+        Builder.CreateRet(RetVal);
+
+        // Validate the generated code, checking for consistency.
+        verifyFunction(*TheFunction);
+
+        // Optimize the function.
+        TheFPM->run(*TheFunction);
+
+        return TheFunction;
+      }
+
+      // Error reading body, remove function.
+      TheFunction->eraseFromParent();
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Top-Level parsing and JIT Driver
+    //===----------------------------------------------------------------------===//
+
+    static ExecutionEngine *TheExecutionEngine;
+
+    static void HandleDefinition() {
+      if (FunctionAST *F = ParseDefinition()) {
+        if (Function *LF = F->Codegen()) {
+          fprintf(stderr, "Read function definition:");
+          LF->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleExtern() {
+      if (PrototypeAST *P = ParseExtern()) {
+        if (Function *F = P->Codegen()) {
+          fprintf(stderr, "Read extern: ");
+          F->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleTopLevelExpression() {
+      // Evaluate a top-level expression into an anonymous function.
+      if (FunctionAST *F = ParseTopLevelExpr()) {
+        if (Function *LF = F->Codegen()) {
+          fprintf(stderr, "Read top-level expression:");
+          LF->dump();
+
+          // JIT the function, returning a function pointer.
+          void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+
+          // Cast it to the right type (takes no arguments, returns a double) so we
+          // can call it as a native function.
+          double (*FP)() = (double (*)())(intptr_t)FPtr;
+          fprintf(stderr, "Evaluated to %f\n", FP());
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    /// top ::= definition | external | expression | ';'
+    static void MainLoop() {
+      while (1) {
+        fprintf(stderr, "ready> ");
+        switch (CurTok) {
+        case tok_eof:    return;
+        case ';':        getNextToken(); break;  // ignore top-level semicolons.
+        case tok_def:    HandleDefinition(); break;
+        case tok_extern: HandleExtern(); break;
+        default:         HandleTopLevelExpression(); break;
+        }
+      }
+    }
+
+    //===----------------------------------------------------------------------===//
+    // "Library" functions that can be "extern'd" from user code.
+    //===----------------------------------------------------------------------===//
+
+    /// putchard - putchar that takes a double and returns 0.
+    extern "C"
+    double putchard(double X) {
+      putchar((char)X);
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Main driver code.
+    //===----------------------------------------------------------------------===//
+
+    int main() {
+      InitializeNativeTarget();
+      LLVMContext &Context = getGlobalContext();
+
+      // Install standard binary operators.
+      // 1 is lowest precedence.
+      BinopPrecedence['<'] = 10;
+      BinopPrecedence['+'] = 20;
+      BinopPrecedence['-'] = 20;
+      BinopPrecedence['*'] = 40;  // highest.
+
+      // Prime the first token.
+      fprintf(stderr, "ready> ");
+      getNextToken();
+
+      // Make the module, which holds all the code.
+      TheModule = new Module("my cool jit", Context);
+
+      // Create the JIT.  This takes ownership of the module.
+      std::string ErrStr;
+      TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+      if (!TheExecutionEngine) {
+        fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
+        exit(1);
+      }
+
+      FunctionPassManager OurFPM(TheModule);
+
+      // Set up the optimizer pipeline.  Start with registering info about how the
+      // target lays out data structures.
+      OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
+      // Provide basic AliasAnalysis support for GVN.
+      OurFPM.add(createBasicAliasAnalysisPass());
+      // Do simple "peephole" optimizations and bit-twiddling optzns.
+      OurFPM.add(createInstructionCombiningPass());
+      // Reassociate expressions.
+      OurFPM.add(createReassociatePass());
+      // Eliminate Common SubExpressions.
+      OurFPM.add(createGVNPass());
+      // Simplify the control flow graph (deleting unreachable blocks, etc).
+      OurFPM.add(createCFGSimplificationPass());
+
+      OurFPM.doInitialization();
+
+      // Set the global so the code gen can use this.
+      TheFPM = &OurFPM;
+
+      // Run the main "interpreter loop" now.
+      MainLoop();
+
+      TheFPM = 0;
+
+      // Print out all of the generated code.
+      TheModule->dump();
+
+      return 0;
+    }
+
+`Next: Extending the language: control flow <LangImpl5.html>`_
+
diff --git a/docs/tutorial/LangImpl5.html b/docs/tutorial/LangImpl5.html
deleted file mode 100644
index 768d9a0e11..0000000000
--- a/docs/tutorial/LangImpl5.html
+++ /dev/null
@@ -1,1772 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Extending the Language: Control Flow</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Extending the Language: Control Flow</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 5
-  <ol>
-    <li><a href="#intro">Chapter 5 Introduction</a></li>
-    <li><a href="#ifthen">If/Then/Else</a>
-    <ol>
-      <li><a href="#iflexer">Lexer Extensions</a></li>
-      <li><a href="#ifast">AST Extensions</a></li>
-      <li><a href="#ifparser">Parser Extensions</a></li>
-      <li><a href="#ifir">LLVM IR</a></li>
-      <li><a href="#ifcodegen">Code Generation</a></li>
-    </ol>
-    </li>
-    <li><a href="#for">'for' Loop Expression</a>
-    <ol>
-      <li><a href="#forlexer">Lexer Extensions</a></li>
-      <li><a href="#forast">AST Extensions</a></li>
-      <li><a href="#forparser">Parser Extensions</a></li>
-      <li><a href="#forir">LLVM IR</a></li>
-      <li><a href="#forcodegen">Code Generation</a></li>
-    </ol>
-    </li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="LangImpl6.html">Chapter 6</a>: Extending the Language: 
-User-defined Operators</li>
-</ul>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 5 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 5 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  Parts 1-4 described the implementation of the simple
-Kaleidoscope language and included support for generating LLVM IR, followed by
-optimizations and a JIT compiler.  Unfortunately, as presented, Kaleidoscope is
-mostly useless: it has no control flow other than call and return.  This means
-that you can't have conditional branches in the code, significantly limiting its
-power.  In this episode of "build that compiler", we'll extend Kaleidoscope to
-have an if/then/else expression plus a simple 'for' loop.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="ifthen">If/Then/Else</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Extending Kaleidoscope to support if/then/else is quite straightforward.  It
-basically requires adding support for this "new" concept to the lexer,
-parser, AST, and LLVM code emitter.  This example is nice, because it shows how
-easy it is to "grow" a language over time, incrementally extending it as new
-ideas are discovered.</p>
-
-<p>Before we get going on "how" we add this extension, lets talk about "what" we
-want.  The basic idea is that we want to be able to write this sort of thing:
-</p>
-
-<div class="doc_code">
-<pre>
-def fib(x)
-  if x &lt; 3 then
-    1
-  else
-    fib(x-1)+fib(x-2);
-</pre>
-</div>
-
-<p>In Kaleidoscope, every construct is an expression: there are no statements.
-As such, the if/then/else expression needs to return a value like any other.
-Since we're using a mostly functional form, we'll have it evaluate its
-conditional, then return the 'then' or 'else' value based on how the condition
-was resolved.  This is very similar to the C "?:" expression.</p>
-
-<p>The semantics of the if/then/else expression is that it evaluates the
-condition to a boolean equality value: 0.0 is considered to be false and
-everything else is considered to be true.
-If the condition is true, the first subexpression is evaluated and returned, if
-the condition is false, the second subexpression is evaluated and returned.
-Since Kaleidoscope allows side-effects, this behavior is important to nail down.
-</p>
-
-<p>Now that we know what we "want", lets break this down into its constituent
-pieces.</p>
-
-<!-- ======================================================================= -->
-<h4><a name="iflexer">Lexer Extensions for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-
-<div>
-
-<p>The lexer extensions are straightforward.  First we add new enum values
-for the relevant tokens:</p>
-
-<div class="doc_code">
-<pre>
-  // control
-  tok_if = -6, tok_then = -7, tok_else = -8,
-</pre>
-</div>
-
-<p>Once we have that, we recognize the new keywords in the lexer. This is pretty simple
-stuff:</p>
-
-<div class="doc_code">
-<pre>
-    ...
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    <b>if (IdentifierStr == "if") return tok_if;
-    if (IdentifierStr == "then") return tok_then;
-    if (IdentifierStr == "else") return tok_else;</b>
-    return tok_identifier;
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="ifast">AST Extensions for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>To represent the new expression we add a new AST node for it:</p>
-
-<div class="doc_code">
-<pre>
-/// IfExprAST - Expression class for if/then/else.
-class IfExprAST : public ExprAST {
-  ExprAST *Cond, *Then, *Else;
-public:
-  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
-    : Cond(cond), Then(then), Else(_else) {}
-  virtual Value *Codegen();
-};
-</pre>
-</div>
-
-<p>The AST node just has pointers to the various subexpressions.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="ifparser">Parser Extensions for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Now that we have the relevant tokens coming from the lexer and we have the
-AST node to build, our parsing logic is relatively straightforward.  First we
-define a new parsing function:</p>
-
-<div class="doc_code">
-<pre>
-/// ifexpr ::= 'if' expression 'then' expression 'else' expression
-static ExprAST *ParseIfExpr() {
-  getNextToken();  // eat the if.
-  
-  // condition.
-  ExprAST *Cond = ParseExpression();
-  if (!Cond) return 0;
-  
-  if (CurTok != tok_then)
-    return Error("expected then");
-  getNextToken();  // eat the then
-  
-  ExprAST *Then = ParseExpression();
-  if (Then == 0) return 0;
-  
-  if (CurTok != tok_else)
-    return Error("expected else");
-  
-  getNextToken();
-  
-  ExprAST *Else = ParseExpression();
-  if (!Else) return 0;
-  
-  return new IfExprAST(Cond, Then, Else);
-}
-</pre>
-</div>
-
-<p>Next we hook it up as a primary expression:</p>
-
-<div class="doc_code">
-<pre>
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  <b>case tok_if:         return ParseIfExpr();</b>
-  }
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="ifir">LLVM IR for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Now that we have it parsing and building the AST, the final piece is adding
-LLVM code generation support.  This is the most interesting part of the
-if/then/else example, because this is where it starts to introduce new concepts.
-All of the code above has been thoroughly described in previous chapters.
-</p>
-
-<p>To motivate the code we want to produce, lets take a look at a simple
-example.  Consider:</p>
-
-<div class="doc_code">
-<pre>
-extern foo();
-extern bar();
-def baz(x) if x then foo() else bar();
-</pre>
-</div>
-
-<p>If you disable optimizations, the code you'll (soon) get from Kaleidoscope
-looks like this:</p>
-
-<div class="doc_code">
-<pre>
-declare double @foo()
-
-declare double @bar()
-
-define double @baz(double %x) {
-entry:
-  %ifcond = fcmp one double %x, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:		; preds = %entry
-  %calltmp = call double @foo()
-  br label %ifcont
-
-else:		; preds = %entry
-  %calltmp1 = call double @bar()
-  br label %ifcont
-
-ifcont:		; preds = %else, %then
-  %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ]
-  ret double %iftmp
-}
-</pre>
-</div>
-
-<p>To visualize the control flow graph, you can use a nifty feature of the LLVM
-'<a href="http://llvm.org/cmds/opt.html">opt</a>' tool.  If you put this LLVM IR
-into "t.ll" and run "<tt>llvm-as &lt; t.ll | opt -analyze -view-cfg</tt>", <a
-href="../ProgrammersManual.html#ViewGraph">a window will pop up</a> and you'll
-see this graph:</p>
-
-<div style="text-align: center"><img src="LangImpl5-cfg.png" alt="Example CFG" width="423" 
-height="315"></div>
-
-<p>Another way to get this is to call "<tt>F-&gt;viewCFG()</tt>" or
-"<tt>F-&gt;viewCFGOnly()</tt>" (where F is a "<tt>Function*</tt>") either by
-inserting actual calls into the code and recompiling or by calling these in the
-debugger.  LLVM has many nice features for visualizing various graphs.</p>
-
-<p>Getting back to the generated code, it is fairly simple: the entry block 
-evaluates the conditional expression ("x" in our case here) and compares the
-result to 0.0 with the "<tt><a href="../LangRef.html#i_fcmp">fcmp</a> one</tt>"
-instruction ('one' is "Ordered and Not Equal").  Based on the result of this
-expression, the code jumps to either the "then" or "else" blocks, which contain
-the expressions for the true/false cases.</p>
-
-<p>Once the then/else blocks are finished executing, they both branch back to the
-'ifcont' block to execute the code that happens after the if/then/else.  In this
-case the only thing left to do is to return to the caller of the function.  The
-question then becomes: how does the code know which expression to return?</p>
-
-<p>The answer to this question involves an important SSA operation: the
-<a href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Phi
-operation</a>.  If you're not familiar with SSA, <a 
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">the wikipedia
-article</a> is a good introduction and there are various other introductions to
-it available on your favorite search engine.  The short version is that
-"execution" of the Phi operation requires "remembering" which block control came
-from.  The Phi operation takes on the value corresponding to the input control
-block.  In this case, if control comes in from the "then" block, it gets the
-value of "calltmp".  If control comes from the "else" block, it gets the value
-of "calltmp1".</p>
-
-<p>At this point, you are probably starting to think "Oh no! This means my
-simple and elegant front-end will have to start generating SSA form in order to
-use LLVM!".  Fortunately, this is not the case, and we strongly advise
-<em>not</em> implementing an SSA construction algorithm in your front-end
-unless there is an amazingly good reason to do so.  In practice, there are two
-sorts of values that float around in code written for your average imperative
-programming language that might need Phi nodes:</p>
-
-<ol>
-<li>Code that involves user variables: <tt>x = 1; x = x + 1; </tt></li>
-<li>Values that are implicit in the structure of your AST, such as the Phi node
-in this case.</li>
-</ol>
-
-<p>In <a href="LangImpl7.html">Chapter 7</a> of this tutorial ("mutable 
-variables"), we'll talk about #1
-in depth.  For now, just believe me that you don't need SSA construction to
-handle this case.  For #2, you have the choice of using the techniques that we will 
-describe for #1, or you can insert Phi nodes directly, if convenient.  In this 
-case, it is really really easy to generate the Phi node, so we choose to do it
-directly.</p>
-
-<p>Okay, enough of the motivation and overview, lets generate code!</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="ifcodegen">Code Generation for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>In order to generate code for this, we implement the <tt>Codegen</tt> method
-for <tt>IfExprAST</tt>:</p>
-
-<div class="doc_code">
-<pre>
-Value *IfExprAST::Codegen() {
-  Value *CondV = Cond-&gt;Codegen();
-  if (CondV == 0) return 0;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  CondV = Builder.CreateFCmpONE(CondV, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                "ifcond");
-</pre>
-</div>
-
-<p>This code is straightforward and similar to what we saw before.  We emit the
-expression for the condition, then compare that value to zero to get a truth
-value as a 1-bit (bool) value.</p>
-
-<div class="doc_code">
-<pre>
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-  
-  // Create blocks for the then and else cases.  Insert the 'then' block at the
-  // end of the function.
-  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
-  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
-  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
-
-  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
-</pre>
-</div>
-
-<p>This code creates the basic blocks that are related to the if/then/else
-statement, and correspond directly to the blocks in the example above.  The
-first line gets the current Function object that is being built.  It
-gets this by asking the builder for the current BasicBlock, and asking that
-block for its "parent" (the function it is currently embedded into).</p>
-
-<p>Once it has that, it creates three blocks.  Note that it passes "TheFunction"
-into the constructor for the "then" block.  This causes the constructor to
-automatically insert the new block into the end of the specified function.  The
-other two blocks are created, but aren't yet inserted into the function.</p>
-
-<p>Once the blocks are created, we can emit the conditional branch that chooses
-between them.  Note that creating new blocks does not implicitly affect the
-IRBuilder, so it is still inserting into the block that the condition
-went into.  Also note that it is creating a branch to the "then" block and the
-"else" block, even though the "else" block isn't inserted into the function yet.
-This is all ok: it is the standard way that LLVM supports forward 
-references.</p>
-
-<div class="doc_code">
-<pre>
-  // Emit then value.
-  Builder.SetInsertPoint(ThenBB);
-  
-  Value *ThenV = Then-&gt;Codegen();
-  if (ThenV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
-  ThenBB = Builder.GetInsertBlock();
-</pre>
-</div>
-
-<p>After the conditional branch is inserted, we move the builder to start
-inserting into the "then" block.  Strictly speaking, this call moves the
-insertion point to be at the end of the specified block.  However, since the
-"then" block is empty, it also starts out by inserting at the beginning of the
-block.  :)</p>
-
-<p>Once the insertion point is set, we recursively codegen the "then" expression
-from the AST.  To finish off the "then" block, we create an unconditional branch
-to the merge block.  One interesting (and very important) aspect of the LLVM IR
-is that it <a href="../LangRef.html#functionstructure">requires all basic blocks
-to be "terminated"</a> with a <a href="../LangRef.html#terminators">control flow
-instruction</a> such as return or branch.  This means that all control flow,
-<em>including fall throughs</em> must be made explicit in the LLVM IR.  If you
-violate this rule, the verifier will emit an error.</p>
-
-<p>The final line here is quite subtle, but is very important.  The basic issue
-is that when we create the Phi node in the merge block, we need to set up the
-block/value pairs that indicate how the Phi will work.  Importantly, the Phi
-node expects to have an entry for each predecessor of the block in the CFG.  Why
-then, are we getting the current block when we just set it to ThenBB 5 lines
-above?  The problem is that the "Then" expression may actually itself change the
-block that the Builder is emitting into if, for example, it contains a nested
-"if/then/else" expression.  Because calling Codegen recursively could
-arbitrarily change the notion of the current block, we are required to get an
-up-to-date value for code that will set up the Phi node.</p>
-
-<div class="doc_code">
-<pre>
-  // Emit else block.
-  TheFunction-&gt;getBasicBlockList().push_back(ElseBB);
-  Builder.SetInsertPoint(ElseBB);
-  
-  Value *ElseV = Else-&gt;Codegen();
-  if (ElseV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
-  ElseBB = Builder.GetInsertBlock();
-</pre>
-</div>
-
-<p>Code generation for the 'else' block is basically identical to codegen for
-the 'then' block.  The only significant difference is the first line, which adds
-the 'else' block to the function.  Recall previously that the 'else' block was
-created, but not added to the function.  Now that the 'then' and 'else' blocks
-are emitted, we can finish up with the merge code:</p>
-
-<div class="doc_code">
-<pre>
-  // Emit merge block.
-  TheFunction->getBasicBlockList().push_back(MergeBB);
-  Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
-                                  "iftmp");
-  
-  PN->addIncoming(ThenV, ThenBB);
-  PN->addIncoming(ElseV, ElseBB);
-  return PN;
-}
-</pre>
-</div>
-
-<p>The first two lines here are now familiar: the first adds the "merge" block
-to the Function object (it was previously floating, like the else block above).
-The second block changes the insertion point so that newly created code will go
-into the "merge" block.  Once that is done, we need to create the PHI node and
-set up the block/value pairs for the PHI.</p>
-
-<p>Finally, the CodeGen function returns the phi node as the value computed by
-the if/then/else expression.  In our example above, this returned value will 
-feed into the code for the top-level function, which will create the return
-instruction.</p>
-
-<p>Overall, we now have the ability to execute conditional code in
-Kaleidoscope.  With this extension, Kaleidoscope is a fairly complete language
-that can calculate a wide variety of numeric functions.  Next up we'll add
-another useful expression that is familiar from non-functional languages...</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="for">'for' Loop Expression</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Now that we know how to add basic control flow constructs to the language,
-we have the tools to add more powerful things.  Lets add something more
-aggressive, a 'for' expression:</p>
-
-<div class="doc_code">
-<pre>
- extern putchard(char)
- def printstar(n)
-   for i = 1, i &lt; n, 1.0 in
-     putchard(42);  # ascii 42 = '*'
-     
- # print 100 '*' characters
- printstar(100);
-</pre>
-</div>
-
-<p>This expression defines a new variable ("i" in this case) which iterates from
-a starting value, while the condition ("i &lt; n" in this case) is true, 
-incrementing by an optional step value ("1.0" in this case).  If the step value
-is omitted, it defaults to 1.0.  While the loop is true, it executes its 
-body expression.  Because we don't have anything better to return, we'll just
-define the loop as always returning 0.0.  In the future when we have mutable
-variables, it will get more useful.</p>
-
-<p>As before, lets talk about the changes that we need to Kaleidoscope to
-support this.</p>
-
-<!-- ======================================================================= -->
-<h4><a name="forlexer">Lexer Extensions for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>The lexer extensions are the same sort of thing as for if/then/else:</p>
-
-<div class="doc_code">
-<pre>
-  ... in enum Token ...
-  // control
-  tok_if = -6, tok_then = -7, tok_else = -8,
-<b>  tok_for = -9, tok_in = -10</b>
-
-  ... in gettok ...
-  if (IdentifierStr == "def") return tok_def;
-  if (IdentifierStr == "extern") return tok_extern;
-  if (IdentifierStr == "if") return tok_if;
-  if (IdentifierStr == "then") return tok_then;
-  if (IdentifierStr == "else") return tok_else;
-  <b>if (IdentifierStr == "for") return tok_for;
-  if (IdentifierStr == "in") return tok_in;</b>
-  return tok_identifier;
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="forast">AST Extensions for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>The AST node is just as simple.  It basically boils down to capturing
-the variable name and the constituent expressions in the node.</p>
-
-<div class="doc_code">
-<pre>
-/// ForExprAST - Expression class for for/in.
-class ForExprAST : public ExprAST {
-  std::string VarName;
-  ExprAST *Start, *End, *Step, *Body;
-public:
-  ForExprAST(const std::string &amp;varname, ExprAST *start, ExprAST *end,
-             ExprAST *step, ExprAST *body)
-    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  virtual Value *Codegen();
-};
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="forparser">Parser Extensions for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>The parser code is also fairly standard.  The only interesting thing here is
-handling of the optional step value.  The parser code handles it by checking to
-see if the second comma is present.  If not, it sets the step value to null in
-the AST node:</p>
-
-<div class="doc_code">
-<pre>
-/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
-static ExprAST *ParseForExpr() {
-  getNextToken();  // eat the for.
-
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after for");
-  
-  std::string IdName = IdentifierStr;
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '=')
-    return Error("expected '=' after for");
-  getNextToken();  // eat '='.
-  
-  
-  ExprAST *Start = ParseExpression();
-  if (Start == 0) return 0;
-  if (CurTok != ',')
-    return Error("expected ',' after for start value");
-  getNextToken();
-  
-  ExprAST *End = ParseExpression();
-  if (End == 0) return 0;
-  
-  // The step value is optional.
-  ExprAST *Step = 0;
-  if (CurTok == ',') {
-    getNextToken();
-    Step = ParseExpression();
-    if (Step == 0) return 0;
-  }
-  
-  if (CurTok != tok_in)
-    return Error("expected 'in' after for");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-
-  return new ForExprAST(IdName, Start, End, Step, Body);
-}
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="forir">LLVM IR for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Now we get to the good part: the LLVM IR we want to generate for this thing.
-With the simple example above, we get this LLVM IR (note that this dump is
-generated with optimizations disabled for clarity):
-</p>
-
-<div class="doc_code">
-<pre>
-declare double @putchard(double)
-
-define double @printstar(double %n) {
-entry:
-  ; initial value = 1.0 (inlined into phi)
-  br label %loop
-
-loop:		; preds = %loop, %entry
-  %i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ]
-  ; body
-  %calltmp = call double @putchard(double 4.200000e+01)
-  ; increment
-  %nextvar = fadd double %i, 1.000000e+00
-
-  ; termination test
-  %cmptmp = fcmp ult double %i, %n
-  %booltmp = uitofp i1 %cmptmp to double
-  %loopcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %loopcond, label %loop, label %afterloop
-
-afterloop:		; preds = %loop
-  ; loop always returns 0.0
-  ret double 0.000000e+00
-}
-</pre>
-</div>
-
-<p>This loop contains all the same constructs we saw before: a phi node, several
-expressions, and some basic blocks.  Lets see how this fits together.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="forcodegen">Code Generation for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>The first part of Codegen is very simple: we just output the start expression
-for the loop value:</p>
-
-<div class="doc_code">
-<pre>
-Value *ForExprAST::Codegen() {
-  // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start-&gt;Codegen();
-  if (StartVal == 0) return 0;
-</pre>
-</div>
-
-<p>With this out of the way, the next step is to set up the LLVM basic block
-for the start of the loop body.  In the case above, the whole loop body is one
-block, but remember that the body code itself could consist of multiple blocks
-(e.g. if it contains an if/then/else or a for/in expression).</p>
-
-<div class="doc_code">
-<pre>
-  // Make the new basic block for the loop header, inserting after current
-  // block.
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
-  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
-  
-  // Insert an explicit fall through from the current block to the LoopBB.
-  Builder.CreateBr(LoopBB);
-</pre>
-</div>
-
-<p>This code is similar to what we saw for if/then/else.  Because we will need
-it to create the Phi node, we remember the block that falls through into the
-loop.  Once we have that, we create the actual block that starts the loop and
-create an unconditional branch for the fall-through between the two blocks.</p>
-  
-<div class="doc_code">
-<pre>
-  // Start insertion in LoopBB.
-  Builder.SetInsertPoint(LoopBB);
-  
-  // Start the PHI node with an entry for Start.
-  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
-  Variable-&gt;addIncoming(StartVal, PreheaderBB);
-</pre>
-</div>
-
-<p>Now that the "preheader" for the loop is set up, we switch to emitting code
-for the loop body.  To begin with, we move the insertion point and create the
-PHI node for the loop induction variable.  Since we already know the incoming
-value for the starting value, we add it to the Phi node.  Note that the Phi will
-eventually get a second value for the backedge, but we can't set it up yet
-(because it doesn't exist!).</p>
-
-<div class="doc_code">
-<pre>
-  // Within the loop, the variable is defined equal to the PHI node.  If it
-  // shadows an existing variable, we have to restore it, so save it now.
-  Value *OldVal = NamedValues[VarName];
-  NamedValues[VarName] = Variable;
-  
-  // Emit the body of the loop.  This, like any other expr, can change the
-  // current BB.  Note that we ignore the value computed by the body, but don't
-  // allow an error.
-  if (Body-&gt;Codegen() == 0)
-    return 0;
-</pre>
-</div>
-
-<p>Now the code starts to get more interesting.  Our 'for' loop introduces a new
-variable to the symbol table.  This means that our symbol table can now contain
-either function arguments or loop variables.  To handle this, before we codegen
-the body of the loop, we add the loop variable as the current value for its
-name.  Note that it is possible that there is a variable of the same name in the
-outer scope.  It would be easy to make this an error (emit an error and return
-null if there is already an entry for VarName) but we choose to allow shadowing
-of variables.  In order to handle this correctly, we remember the Value that
-we are potentially shadowing in <tt>OldVal</tt> (which will be null if there is
-no shadowed variable).</p>
-
-<p>Once the loop variable is set into the symbol table, the code recursively
-codegen's the body.  This allows the body to use the loop variable: any
-references to it will naturally find it in the symbol table.</p>
-
-<div class="doc_code">
-<pre>
-  // Emit the step value.
-  Value *StepVal;
-  if (Step) {
-    StepVal = Step-&gt;Codegen();
-    if (StepVal == 0) return 0;
-  } else {
-    // If not specified, use 1.0.
-    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
-  }
-  
-  Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
-</pre>
-</div>
-
-<p>Now that the body is emitted, we compute the next value of the iteration
-variable by adding the step value, or 1.0 if it isn't present. '<tt>NextVar</tt>'
-will be the value of the loop variable on the next iteration of the loop.</p>
-
-<div class="doc_code">
-<pre>
-  // Compute the end condition.
-  Value *EndCond = End-&gt;Codegen();
-  if (EndCond == 0) return EndCond;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  EndCond = Builder.CreateFCmpONE(EndCond, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                  "loopcond");
-</pre>
-</div>
-
-<p>Finally, we evaluate the exit value of the loop, to determine whether the
-loop should exit.  This mirrors the condition evaluation for the if/then/else
-statement.</p>
-      
-<div class="doc_code">
-<pre>
-  // Create the "after loop" block and insert it.
-  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
-  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
-  
-  // Insert the conditional branch into the end of LoopEndBB.
-  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
-  
-  // Any new code will be inserted in AfterBB.
-  Builder.SetInsertPoint(AfterBB);
-</pre>
-</div>
-
-<p>With the code for the body of the loop complete, we just need to finish up
-the control flow for it.  This code remembers the end block (for the phi node),
-then creates the block for the loop exit ("afterloop").  Based on the value of
-the exit condition, it creates a conditional branch that chooses between
-executing the loop again and exiting the loop.  Any future code is emitted in
-the "afterloop" block, so it sets the insertion position to it.</p>
-  
-<div class="doc_code">
-<pre>
-  // Add a new entry to the PHI node for the backedge.
-  Variable-&gt;addIncoming(NextVar, LoopEndBB);
-  
-  // Restore the unshadowed variable.
-  if (OldVal)
-    NamedValues[VarName] = OldVal;
-  else
-    NamedValues.erase(VarName);
-  
-  // for expr always returns 0.0.
-  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
-}
-</pre>
-</div>
-
-<p>The final code handles various cleanups: now that we have the "NextVar"
-value, we can add the incoming value to the loop PHI node.  After that, we
-remove the loop variable from the symbol table, so that it isn't in scope after
-the for loop.  Finally, code generation of the for loop always returns 0.0, so
-that is what we return from <tt>ForExprAST::Codegen</tt>.</p>
-
-<p>With this, we conclude the "adding control flow to Kaleidoscope" chapter of
-the tutorial.  In this chapter we added two control flow constructs, and used them to motivate a couple of aspects of the LLVM IR that are important for front-end implementors
-to know.  In the next chapter of our saga, we will get a bit crazier and add
-<a href="LangImpl6.html">user-defined operators</a> to our poor innocent 
-language.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with the
-if/then/else and for expressions..  To build this example, use:
-</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
-# Run
-./toy
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<div class="doc_code">
-<pre>
-#include "llvm/DerivedTypes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/TargetSelect.h"
-#include &lt;cstdio&gt;
-#include &lt;string&gt;
-#include &lt;map&gt;
-#include &lt;vector&gt;
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5,
-  
-  // control
-  tok_if = -6, tok_then = -7, tok_else = -8,
-  tok_for = -9, tok_in = -10
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    if (IdentifierStr == "if") return tok_if;
-    if (IdentifierStr == "then") return tok_then;
-    if (IdentifierStr == "else") return tok_else;
-    if (IdentifierStr == "for") return tok_for;
-    if (IdentifierStr == "in") return tok_in;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &amp;name) : Name(name) {}
-  virtual Value *Codegen();
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector&lt;ExprAST*&gt; Args;
-public:
-  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
-    : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
-};
-
-/// IfExprAST - Expression class for if/then/else.
-class IfExprAST : public ExprAST {
-  ExprAST *Cond, *Then, *Else;
-public:
-  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
-  : Cond(cond), Then(then), Else(_else) {}
-  virtual Value *Codegen();
-};
-
-/// ForExprAST - Expression class for for/in.
-class ForExprAST : public ExprAST {
-  std::string VarName;
-  ExprAST *Start, *End, *Step, *Body;
-public:
-  ForExprAST(const std::string &amp;varname, ExprAST *start, ExprAST *end,
-             ExprAST *step, ExprAST *body)
-    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  virtual Value *Codegen();
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes).
-class PrototypeAST {
-  std::string Name;
-  std::vector&lt;std::string&gt; Args;
-public:
-  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args)
-    : Name(name), Args(args) {}
-  
-  Function *Codegen();
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-  Function *Codegen();
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map&lt;char, int&gt; BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec &lt;= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector&lt;ExprAST*&gt; Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// ifexpr ::= 'if' expression 'then' expression 'else' expression
-static ExprAST *ParseIfExpr() {
-  getNextToken();  // eat the if.
-  
-  // condition.
-  ExprAST *Cond = ParseExpression();
-  if (!Cond) return 0;
-  
-  if (CurTok != tok_then)
-    return Error("expected then");
-  getNextToken();  // eat the then
-  
-  ExprAST *Then = ParseExpression();
-  if (Then == 0) return 0;
-  
-  if (CurTok != tok_else)
-    return Error("expected else");
-  
-  getNextToken();
-  
-  ExprAST *Else = ParseExpression();
-  if (!Else) return 0;
-  
-  return new IfExprAST(Cond, Then, Else);
-}
-
-/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
-static ExprAST *ParseForExpr() {
-  getNextToken();  // eat the for.
-
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after for");
-  
-  std::string IdName = IdentifierStr;
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '=')
-    return Error("expected '=' after for");
-  getNextToken();  // eat '='.
-  
-  
-  ExprAST *Start = ParseExpression();
-  if (Start == 0) return 0;
-  if (CurTok != ',')
-    return Error("expected ',' after for start value");
-  getNextToken();
-  
-  ExprAST *End = ParseExpression();
-  if (End == 0) return 0;
-  
-  // The step value is optional.
-  ExprAST *Step = 0;
-  if (CurTok == ',') {
-    getNextToken();
-    Step = ParseExpression();
-    if (Step == 0) return 0;
-  }
-  
-  if (CurTok != tok_in)
-    return Error("expected 'in' after for");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-
-  return new ForExprAST(IdName, Start, End, Step, Body);
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-///   ::= ifexpr
-///   ::= forexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  case tok_if:         return ParseIfExpr();
-  case tok_for:        return ParseForExpr();
-  }
-}
-
-/// binoprhs
-///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec &lt; ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec &lt; NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= primary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
-  if (CurTok != tok_identifier)
-    return ErrorP("Expected function name in prototype");
-
-  std::string FnName = IdentifierStr;
-  getNextToken();
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector&lt;std::string&gt; ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  return new PrototypeAST(FnName, ArgNames);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Code Generation
-//===----------------------------------------------------------------------===//
-
-static Module *TheModule;
-static IRBuilder&lt;&gt; Builder(getGlobalContext());
-static std::map&lt;std::string, Value*&gt; NamedValues;
-static FunctionPassManager *TheFPM;
-
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
-}
-
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS-&gt;Codegen();
-  Value *R = RHS-&gt;Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '&lt;':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: return ErrorV("invalid binary operator");
-  }
-}
-
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule-&gt;getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF-&gt;arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector&lt;Value*&gt; ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]-&gt;Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-
-Value *IfExprAST::Codegen() {
-  Value *CondV = Cond-&gt;Codegen();
-  if (CondV == 0) return 0;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  CondV = Builder.CreateFCmpONE(CondV, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                "ifcond");
-  
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-  
-  // Create blocks for the then and else cases.  Insert the 'then' block at the
-  // end of the function.
-  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
-  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
-  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
-  
-  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
-  
-  // Emit then value.
-  Builder.SetInsertPoint(ThenBB);
-  
-  Value *ThenV = Then-&gt;Codegen();
-  if (ThenV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
-  ThenBB = Builder.GetInsertBlock();
-  
-  // Emit else block.
-  TheFunction-&gt;getBasicBlockList().push_back(ElseBB);
-  Builder.SetInsertPoint(ElseBB);
-  
-  Value *ElseV = Else-&gt;Codegen();
-  if (ElseV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
-  ElseBB = Builder.GetInsertBlock();
-  
-  // Emit merge block.
-  TheFunction-&gt;getBasicBlockList().push_back(MergeBB);
-  Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
-                                  "iftmp");
-  
-  PN-&gt;addIncoming(ThenV, ThenBB);
-  PN-&gt;addIncoming(ElseV, ElseBB);
-  return PN;
-}
-
-Value *ForExprAST::Codegen() {
-  // Output this as:
-  //   ...
-  //   start = startexpr
-  //   goto loop
-  // loop: 
-  //   variable = phi [start, loopheader], [nextvariable, loopend]
-  //   ...
-  //   bodyexpr
-  //   ...
-  // loopend:
-  //   step = stepexpr
-  //   nextvariable = variable + step
-  //   endcond = endexpr
-  //   br endcond, loop, endloop
-  // outloop:
-  
-  // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start-&gt;Codegen();
-  if (StartVal == 0) return 0;
-  
-  // Make the new basic block for the loop header, inserting after current
-  // block.
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
-  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
-  
-  // Insert an explicit fall through from the current block to the LoopBB.
-  Builder.CreateBr(LoopBB);
-
-  // Start insertion in LoopBB.
-  Builder.SetInsertPoint(LoopBB);
-  
-  // Start the PHI node with an entry for Start.
-  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
-  Variable-&gt;addIncoming(StartVal, PreheaderBB);
-  
-  // Within the loop, the variable is defined equal to the PHI node.  If it
-  // shadows an existing variable, we have to restore it, so save it now.
-  Value *OldVal = NamedValues[VarName];
-  NamedValues[VarName] = Variable;
-  
-  // Emit the body of the loop.  This, like any other expr, can change the
-  // current BB.  Note that we ignore the value computed by the body, but don't
-  // allow an error.
-  if (Body-&gt;Codegen() == 0)
-    return 0;
-  
-  // Emit the step value.
-  Value *StepVal;
-  if (Step) {
-    StepVal = Step-&gt;Codegen();
-    if (StepVal == 0) return 0;
-  } else {
-    // If not specified, use 1.0.
-    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
-  }
-  
-  Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
-
-  // Compute the end condition.
-  Value *EndCond = End-&gt;Codegen();
-  if (EndCond == 0) return EndCond;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  EndCond = Builder.CreateFCmpONE(EndCond, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                  "loopcond");
-  
-  // Create the "after loop" block and insert it.
-  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
-  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
-  
-  // Insert the conditional branch into the end of LoopEndBB.
-  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
-  
-  // Any new code will be inserted in AfterBB.
-  Builder.SetInsertPoint(AfterBB);
-  
-  // Add a new entry to the PHI node for the backedge.
-  Variable-&gt;addIncoming(NextVar, LoopEndBB);
-  
-  // Restore the unshadowed variable.
-  if (OldVal)
-    NamedValues[VarName] = OldVal;
-  else
-    NamedValues.erase(VarName);
-
-  
-  // for expr always returns 0.0.
-  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
-}
-
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector&lt;Type*&gt; Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-  
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-  
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F-&gt;getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F-&gt;eraseFromParent();
-    F = TheModule-&gt;getFunction(Name);
-    
-    // If F already has a body, reject this.
-    if (!F-&gt;empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F-&gt;arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-  
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI-&gt;setName(Args[Idx]);
-    
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
-  
-  return F;
-}
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto-&gt;Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body-&gt;Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    // Optimize the function.
-    TheFPM-&gt;run(*TheFunction);
-    
-    return TheFunction;
-  }
-  
-  // Error reading body, remove function.
-  TheFunction-&gt;eraseFromParent();
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing and JIT Driver
-//===----------------------------------------------------------------------===//
-
-static ExecutionEngine *TheExecutionEngine;
-
-static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      fprintf(stderr, "Read function definition:");
-      LF-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P-&gt;Codegen()) {
-      fprintf(stderr, "Read extern: ");
-      F-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      // JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine-&gt;getPointerToFunction(LF);
-      
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
-      fprintf(stderr, "Evaluated to %f\n", FP());
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready&gt; ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// "Library" functions that can be "extern'd" from user code.
-//===----------------------------------------------------------------------===//
-
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  InitializeNativeTarget();
-  LLVMContext &amp;Context = getGlobalContext();
-
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['&lt;'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready&gt; ");
-  getNextToken();
-
-  // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit", Context);
-
-  // Create the JIT.  This takes ownership of the module.
-  std::string ErrStr;
-  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&amp;ErrStr).create();
-  if (!TheExecutionEngine) {
-    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
-    exit(1);
-  }
-
-  FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  OurFPM.add(new DataLayout(*TheExecutionEngine-&gt;getDataLayout()));
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &amp;OurFPM;
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  TheFPM = 0;
-
-  // Print out all of the generated code.
-  TheModule-&gt;dump();
-
-  return 0;
-}
-</pre>
-</div>
-
-<a href="LangImpl6.html">Next: Extending the language: user-defined operators</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/LangImpl5.rst b/docs/tutorial/LangImpl5.rst
new file mode 100644
index 0000000000..8405e1a917
--- /dev/null
+++ b/docs/tutorial/LangImpl5.rst
@@ -0,0 +1,1609 @@
+==================================================
+Kaleidoscope: Extending the Language: Control Flow
+==================================================
+
+.. contents::
+   :local:
+
+Written by `Chris Lattner <mailto:sabre@nondot.org>`_
+
+Chapter 5 Introduction
+======================
+
+Welcome to Chapter 5 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. Parts 1-4 described the implementation of
+the simple Kaleidoscope language and included support for generating
+LLVM IR, followed by optimizations and a JIT compiler. Unfortunately, as
+presented, Kaleidoscope is mostly useless: it has no control flow other
+than call and return. This means that you can't have conditional
+branches in the code, significantly limiting its power. In this episode
+of "build that compiler", we'll extend Kaleidoscope to have an
+if/then/else expression plus a simple 'for' loop.
+
+If/Then/Else
+============
+
+Extending Kaleidoscope to support if/then/else is quite straightforward.
+It basically requires adding support for this "new" concept to the
+lexer, parser, AST, and LLVM code emitter. This example is nice, because
+it shows how easy it is to "grow" a language over time, incrementally
+extending it as new ideas are discovered.
+
+Before we get going on "how" we add this extension, lets talk about
+"what" we want. The basic idea is that we want to be able to write this
+sort of thing:
+
+::
+
+    def fib(x)
+      if x < 3 then
+        1
+      else
+        fib(x-1)+fib(x-2);
+
+In Kaleidoscope, every construct is an expression: there are no
+statements. As such, the if/then/else expression needs to return a value
+like any other. Since we're using a mostly functional form, we'll have
+it evaluate its conditional, then return the 'then' or 'else' value
+based on how the condition was resolved. This is very similar to the C
+"?:" expression.
+
+The semantics of the if/then/else expression is that it evaluates the
+condition to a boolean equality value: 0.0 is considered to be false and
+everything else is considered to be true. If the condition is true, the
+first subexpression is evaluated and returned, if the condition is
+false, the second subexpression is evaluated and returned. Since
+Kaleidoscope allows side-effects, this behavior is important to nail
+down.
+
+Now that we know what we "want", lets break this down into its
+constituent pieces.
+
+Lexer Extensions for If/Then/Else
+---------------------------------
+
+The lexer extensions are straightforward. First we add new enum values
+for the relevant tokens:
+
+.. code-block:: c++
+
+      // control
+      tok_if = -6, tok_then = -7, tok_else = -8,
+
+Once we have that, we recognize the new keywords in the lexer. This is
+pretty simple stuff:
+
+.. code-block:: c++
+
+        ...
+        if (IdentifierStr == "def") return tok_def;
+        if (IdentifierStr == "extern") return tok_extern;
+        if (IdentifierStr == "if") return tok_if;
+        if (IdentifierStr == "then") return tok_then;
+        if (IdentifierStr == "else") return tok_else;
+        return tok_identifier;
+
+AST Extensions for If/Then/Else
+-------------------------------
+
+To represent the new expression we add a new AST node for it:
+
+.. code-block:: c++
+
+    /// IfExprAST - Expression class for if/then/else.
+    class IfExprAST : public ExprAST {
+      ExprAST *Cond, *Then, *Else;
+    public:
+      IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+        : Cond(cond), Then(then), Else(_else) {}
+      virtual Value *Codegen();
+    };
+
+The AST node just has pointers to the various subexpressions.
+
+Parser Extensions for If/Then/Else
+----------------------------------
+
+Now that we have the relevant tokens coming from the lexer and we have
+the AST node to build, our parsing logic is relatively straightforward.
+First we define a new parsing function:
+
+.. code-block:: c++
+
+    /// ifexpr ::= 'if' expression 'then' expression 'else' expression
+    static ExprAST *ParseIfExpr() {
+      getNextToken();  // eat the if.
+
+      // condition.
+      ExprAST *Cond = ParseExpression();
+      if (!Cond) return 0;
+
+      if (CurTok != tok_then)
+        return Error("expected then");
+      getNextToken();  // eat the then
+
+      ExprAST *Then = ParseExpression();
+      if (Then == 0) return 0;
+
+      if (CurTok != tok_else)
+        return Error("expected else");
+
+      getNextToken();
+
+      ExprAST *Else = ParseExpression();
+      if (!Else) return 0;
+
+      return new IfExprAST(Cond, Then, Else);
+    }
+
+Next we hook it up as a primary expression:
+
+.. code-block:: c++
+
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      case tok_if:         return ParseIfExpr();
+      }
+    }
+
+LLVM IR for If/Then/Else
+------------------------
+
+Now that we have it parsing and building the AST, the final piece is
+adding LLVM code generation support. This is the most interesting part
+of the if/then/else example, because this is where it starts to
+introduce new concepts. All of the code above has been thoroughly
+described in previous chapters.
+
+To motivate the code we want to produce, lets take a look at a simple
+example. Consider:
+
+::
+
+    extern foo();
+    extern bar();
+    def baz(x) if x then foo() else bar();
+
+If you disable optimizations, the code you'll (soon) get from
+Kaleidoscope looks like this:
+
+.. code-block:: llvm
+
+    declare double @foo()
+
+    declare double @bar()
+
+    define double @baz(double %x) {
+    entry:
+      %ifcond = fcmp one double %x, 0.000000e+00
+      br i1 %ifcond, label %then, label %else
+
+    then:       ; preds = %entry
+      %calltmp = call double @foo()
+      br label %ifcont
+
+    else:       ; preds = %entry
+      %calltmp1 = call double @bar()
+      br label %ifcont
+
+    ifcont:     ; preds = %else, %then
+      %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ]
+      ret double %iftmp
+    }
+
+To visualize the control flow graph, you can use a nifty feature of the
+LLVM '`opt <http://llvm.org/cmds/opt.html>`_' tool. If you put this LLVM
+IR into "t.ll" and run "``llvm-as < t.ll | opt -analyze -view-cfg``", `a
+window will pop up <../ProgrammersManual.html#ViewGraph>`_ and you'll
+see this graph:
+
+.. figure:: LangImpl5-cfg.png
+   :align: center
+   :alt: Example CFG
+
+   Example CFG
+
+Another way to get this is to call "``F->viewCFG()``" or
+"``F->viewCFGOnly()``" (where F is a "``Function*``") either by
+inserting actual calls into the code and recompiling or by calling these
+in the debugger. LLVM has many nice features for visualizing various
+graphs.
+
+Getting back to the generated code, it is fairly simple: the entry block
+evaluates the conditional expression ("x" in our case here) and compares
+the result to 0.0 with the "``fcmp one``" instruction ('one' is "Ordered
+and Not Equal"). Based on the result of this expression, the code jumps
+to either the "then" or "else" blocks, which contain the expressions for
+the true/false cases.
+
+Once the then/else blocks are finished executing, they both branch back
+to the 'ifcont' block to execute the code that happens after the
+if/then/else. In this case the only thing left to do is to return to the
+caller of the function. The question then becomes: how does the code
+know which expression to return?
+
+The answer to this question involves an important SSA operation: the
+`Phi
+operation <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_.
+If you're not familiar with SSA, `the wikipedia
+article <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_
+is a good introduction and there are various other introductions to it
+available on your favorite search engine. The short version is that
+"execution" of the Phi operation requires "remembering" which block
+control came from. The Phi operation takes on the value corresponding to
+the input control block. In this case, if control comes in from the
+"then" block, it gets the value of "calltmp". If control comes from the
+"else" block, it gets the value of "calltmp1".
+
+At this point, you are probably starting to think "Oh no! This means my
+simple and elegant front-end will have to start generating SSA form in
+order to use LLVM!". Fortunately, this is not the case, and we strongly
+advise *not* implementing an SSA construction algorithm in your
+front-end unless there is an amazingly good reason to do so. In
+practice, there are two sorts of values that float around in code
+written for your average imperative programming language that might need
+Phi nodes:
+
+#. Code that involves user variables: ``x = 1; x = x + 1;``
+#. Values that are implicit in the structure of your AST, such as the
+   Phi node in this case.
+
+In `Chapter 7 <LangImpl7.html>`_ of this tutorial ("mutable variables"),
+we'll talk about #1 in depth. For now, just believe me that you don't
+need SSA construction to handle this case. For #2, you have the choice
+of using the techniques that we will describe for #1, or you can insert
+Phi nodes directly, if convenient. In this case, it is really really
+easy to generate the Phi node, so we choose to do it directly.
+
+Okay, enough of the motivation and overview, lets generate code!
+
+Code Generation for If/Then/Else
+--------------------------------
+
+In order to generate code for this, we implement the ``Codegen`` method
+for ``IfExprAST``:
+
+.. code-block:: c++
+
+    Value *IfExprAST::Codegen() {
+      Value *CondV = Cond->Codegen();
+      if (CondV == 0) return 0;
+
+      // Convert condition to a bool by comparing equal to 0.0.
+      CondV = Builder.CreateFCmpONE(CondV,
+                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                    "ifcond");
+
+This code is straightforward and similar to what we saw before. We emit
+the expression for the condition, then compare that value to zero to get
+a truth value as a 1-bit (bool) value.
+
+.. code-block:: c++
+
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+      // Create blocks for the then and else cases.  Insert the 'then' block at the
+      // end of the function.
+      BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+      BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+      BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+
+      Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+
+This code creates the basic blocks that are related to the if/then/else
+statement, and correspond directly to the blocks in the example above.
+The first line gets the current Function object that is being built. It
+gets this by asking the builder for the current BasicBlock, and asking
+that block for its "parent" (the function it is currently embedded
+into).
+
+Once it has that, it creates three blocks. Note that it passes
+"TheFunction" into the constructor for the "then" block. This causes the
+constructor to automatically insert the new block into the end of the
+specified function. The other two blocks are created, but aren't yet
+inserted into the function.
+
+Once the blocks are created, we can emit the conditional branch that
+chooses between them. Note that creating new blocks does not implicitly
+affect the IRBuilder, so it is still inserting into the block that the
+condition went into. Also note that it is creating a branch to the
+"then" block and the "else" block, even though the "else" block isn't
+inserted into the function yet. This is all ok: it is the standard way
+that LLVM supports forward references.
+
+.. code-block:: c++
+
+      // Emit then value.
+      Builder.SetInsertPoint(ThenBB);
+
+      Value *ThenV = Then->Codegen();
+      if (ThenV == 0) return 0;
+
+      Builder.CreateBr(MergeBB);
+      // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+      ThenBB = Builder.GetInsertBlock();
+
+After the conditional branch is inserted, we move the builder to start
+inserting into the "then" block. Strictly speaking, this call moves the
+insertion point to be at the end of the specified block. However, since
+the "then" block is empty, it also starts out by inserting at the
+beginning of the block. :)
+
+Once the insertion point is set, we recursively codegen the "then"
+expression from the AST. To finish off the "then" block, we create an
+unconditional branch to the merge block. One interesting (and very
+important) aspect of the LLVM IR is that it `requires all basic blocks
+to be "terminated" <../LangRef.html#functionstructure>`_ with a `control
+flow instruction <../LangRef.html#terminators>`_ such as return or
+branch. This means that all control flow, *including fall throughs* must
+be made explicit in the LLVM IR. If you violate this rule, the verifier
+will emit an error.
+
+The final line here is quite subtle, but is very important. The basic
+issue is that when we create the Phi node in the merge block, we need to
+set up the block/value pairs that indicate how the Phi will work.
+Importantly, the Phi node expects to have an entry for each predecessor
+of the block in the CFG. Why then, are we getting the current block when
+we just set it to ThenBB 5 lines above? The problem is that the "Then"
+expression may actually itself change the block that the Builder is
+emitting into if, for example, it contains a nested "if/then/else"
+expression. Because calling Codegen recursively could arbitrarily change
+the notion of the current block, we are required to get an up-to-date
+value for code that will set up the Phi node.
+
+.. code-block:: c++
+
+      // Emit else block.
+      TheFunction->getBasicBlockList().push_back(ElseBB);
+      Builder.SetInsertPoint(ElseBB);
+
+      Value *ElseV = Else->Codegen();
+      if (ElseV == 0) return 0;
+
+      Builder.CreateBr(MergeBB);
+      // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+      ElseBB = Builder.GetInsertBlock();
+
+Code generation for the 'else' block is basically identical to codegen
+for the 'then' block. The only significant difference is the first line,
+which adds the 'else' block to the function. Recall previously that the
+'else' block was created, but not added to the function. Now that the
+'then' and 'else' blocks are emitted, we can finish up with the merge
+code:
+
+.. code-block:: c++
+
+      // Emit merge block.
+      TheFunction->getBasicBlockList().push_back(MergeBB);
+      Builder.SetInsertPoint(MergeBB);
+      PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
+                                      "iftmp");
+
+      PN->addIncoming(ThenV, ThenBB);
+      PN->addIncoming(ElseV, ElseBB);
+      return PN;
+    }
+
+The first two lines here are now familiar: the first adds the "merge"
+block to the Function object (it was previously floating, like the else
+block above). The second block changes the insertion point so that newly
+created code will go into the "merge" block. Once that is done, we need
+to create the PHI node and set up the block/value pairs for the PHI.
+
+Finally, the CodeGen function returns the phi node as the value computed
+by the if/then/else expression. In our example above, this returned
+value will feed into the code for the top-level function, which will
+create the return instruction.
+
+Overall, we now have the ability to execute conditional code in
+Kaleidoscope. With this extension, Kaleidoscope is a fairly complete
+language that can calculate a wide variety of numeric functions. Next up
+we'll add another useful expression that is familiar from non-functional
+languages...
+
+'for' Loop Expression
+=====================
+
+Now that we know how to add basic control flow constructs to the
+language, we have the tools to add more powerful things. Lets add
+something more aggressive, a 'for' expression:
+
+::
+
+     extern putchard(char)
+     def printstar(n)
+       for i = 1, i < n, 1.0 in
+         putchard(42);  # ascii 42 = '*'
+
+     # print 100 '*' characters
+     printstar(100);
+
+This expression defines a new variable ("i" in this case) which iterates
+from a starting value, while the condition ("i < n" in this case) is
+true, incrementing by an optional step value ("1.0" in this case). If
+the step value is omitted, it defaults to 1.0. While the loop is true,
+it executes its body expression. Because we don't have anything better
+to return, we'll just define the loop as always returning 0.0. In the
+future when we have mutable variables, it will get more useful.
+
+As before, lets talk about the changes that we need to Kaleidoscope to
+support this.
+
+Lexer Extensions for the 'for' Loop
+-----------------------------------
+
+The lexer extensions are the same sort of thing as for if/then/else:
+
+.. code-block:: c++
+
+      ... in enum Token ...
+      // control
+      tok_if = -6, tok_then = -7, tok_else = -8,
+      tok_for = -9, tok_in = -10
+
+      ... in gettok ...
+      if (IdentifierStr == "def") return tok_def;
+      if (IdentifierStr == "extern") return tok_extern;
+      if (IdentifierStr == "if") return tok_if;
+      if (IdentifierStr == "then") return tok_then;
+      if (IdentifierStr == "else") return tok_else;
+      if (IdentifierStr == "for") return tok_for;
+      if (IdentifierStr == "in") return tok_in;
+      return tok_identifier;
+
+AST Extensions for the 'for' Loop
+---------------------------------
+
+The AST node is just as simple. It basically boils down to capturing the
+variable name and the constituent expressions in the node.
+
+.. code-block:: c++
+
+    /// ForExprAST - Expression class for for/in.
+    class ForExprAST : public ExprAST {
+      std::string VarName;
+      ExprAST *Start, *End, *Step, *Body;
+    public:
+      ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
+                 ExprAST *step, ExprAST *body)
+        : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+      virtual Value *Codegen();
+    };
+
+Parser Extensions for the 'for' Loop
+------------------------------------
+
+The parser code is also fairly standard. The only interesting thing here
+is handling of the optional step value. The parser code handles it by
+checking to see if the second comma is present. If not, it sets the step
+value to null in the AST node:
+
+.. code-block:: c++
+
+    /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+    static ExprAST *ParseForExpr() {
+      getNextToken();  // eat the for.
+
+      if (CurTok != tok_identifier)
+        return Error("expected identifier after for");
+
+      std::string IdName = IdentifierStr;
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '=')
+        return Error("expected '=' after for");
+      getNextToken();  // eat '='.
+
+
+      ExprAST *Start = ParseExpression();
+      if (Start == 0) return 0;
+      if (CurTok != ',')
+        return Error("expected ',' after for start value");
+      getNextToken();
+
+      ExprAST *End = ParseExpression();
+      if (End == 0) return 0;
+
+      // The step value is optional.
+      ExprAST *Step = 0;
+      if (CurTok == ',') {
+        getNextToken();
+        Step = ParseExpression();
+        if (Step == 0) return 0;
+      }
+
+      if (CurTok != tok_in)
+        return Error("expected 'in' after for");
+      getNextToken();  // eat 'in'.
+
+      ExprAST *Body = ParseExpression();
+      if (Body == 0) return 0;
+
+      return new ForExprAST(IdName, Start, End, Step, Body);
+    }
+
+LLVM IR for the 'for' Loop
+--------------------------
+
+Now we get to the good part: the LLVM IR we want to generate for this
+thing. With the simple example above, we get this LLVM IR (note that
+this dump is generated with optimizations disabled for clarity):
+
+.. code-block:: llvm
+
+    declare double @putchard(double)
+
+    define double @printstar(double %n) {
+    entry:
+      ; initial value = 1.0 (inlined into phi)
+      br label %loop
+
+    loop:       ; preds = %loop, %entry
+      %i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ]
+      ; body
+      %calltmp = call double @putchard(double 4.200000e+01)
+      ; increment
+      %nextvar = fadd double %i, 1.000000e+00
+
+      ; termination test
+      %cmptmp = fcmp ult double %i, %n
+      %booltmp = uitofp i1 %cmptmp to double
+      %loopcond = fcmp one double %booltmp, 0.000000e+00
+      br i1 %loopcond, label %loop, label %afterloop
+
+    afterloop:      ; preds = %loop
+      ; loop always returns 0.0
+      ret double 0.000000e+00
+    }
+
+This loop contains all the same constructs we saw before: a phi node,
+several expressions, and some basic blocks. Lets see how this fits
+together.
+
+Code Generation for the 'for' Loop
+----------------------------------
+
+The first part of Codegen is very simple: we just output the start
+expression for the loop value:
+
+.. code-block:: c++
+
+    Value *ForExprAST::Codegen() {
+      // Emit the start code first, without 'variable' in scope.
+      Value *StartVal = Start->Codegen();
+      if (StartVal == 0) return 0;
+
+With this out of the way, the next step is to set up the LLVM basic
+block for the start of the loop body. In the case above, the whole loop
+body is one block, but remember that the body code itself could consist
+of multiple blocks (e.g. if it contains an if/then/else or a for/in
+expression).
+
+.. code-block:: c++
+
+      // Make the new basic block for the loop header, inserting after current
+      // block.
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+      BasicBlock *PreheaderBB = Builder.GetInsertBlock();
+      BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+
+      // Insert an explicit fall through from the current block to the LoopBB.
+      Builder.CreateBr(LoopBB);
+
+This code is similar to what we saw for if/then/else. Because we will
+need it to create the Phi node, we remember the block that falls through
+into the loop. Once we have that, we create the actual block that starts
+the loop and create an unconditional branch for the fall-through between
+the two blocks.
+
+.. code-block:: c++
+
+      // Start insertion in LoopBB.
+      Builder.SetInsertPoint(LoopBB);
+
+      // Start the PHI node with an entry for Start.
+      PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
+      Variable->addIncoming(StartVal, PreheaderBB);
+
+Now that the "preheader" for the loop is set up, we switch to emitting
+code for the loop body. To begin with, we move the insertion point and
+create the PHI node for the loop induction variable. Since we already
+know the incoming value for the starting value, we add it to the Phi
+node. Note that the Phi will eventually get a second value for the
+backedge, but we can't set it up yet (because it doesn't exist!).
+
+.. code-block:: c++
+
+      // Within the loop, the variable is defined equal to the PHI node.  If it
+      // shadows an existing variable, we have to restore it, so save it now.
+      Value *OldVal = NamedValues[VarName];
+      NamedValues[VarName] = Variable;
+
+      // Emit the body of the loop.  This, like any other expr, can change the
+      // current BB.  Note that we ignore the value computed by the body, but don't
+      // allow an error.
+      if (Body->Codegen() == 0)
+        return 0;
+
+Now the code starts to get more interesting. Our 'for' loop introduces a
+new variable to the symbol table. This means that our symbol table can
+now contain either function arguments or loop variables. To handle this,
+before we codegen the body of the loop, we add the loop variable as the
+current value for its name. Note that it is possible that there is a
+variable of the same name in the outer scope. It would be easy to make
+this an error (emit an error and return null if there is already an
+entry for VarName) but we choose to allow shadowing of variables. In
+order to handle this correctly, we remember the Value that we are
+potentially shadowing in ``OldVal`` (which will be null if there is no
+shadowed variable).
+
+Once the loop variable is set into the symbol table, the code
+recursively codegen's the body. This allows the body to use the loop
+variable: any references to it will naturally find it in the symbol
+table.
+
+.. code-block:: c++
+
+      // Emit the step value.
+      Value *StepVal;
+      if (Step) {
+        StepVal = Step->Codegen();
+        if (StepVal == 0) return 0;
+      } else {
+        // If not specified, use 1.0.
+        StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+      }
+
+      Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
+
+Now that the body is emitted, we compute the next value of the iteration
+variable by adding the step value, or 1.0 if it isn't present.
+'``NextVar``' will be the value of the loop variable on the next
+iteration of the loop.
+
+.. code-block:: c++
+
+      // Compute the end condition.
+      Value *EndCond = End->Codegen();
+      if (EndCond == 0) return EndCond;
+
+      // Convert condition to a bool by comparing equal to 0.0.
+      EndCond = Builder.CreateFCmpONE(EndCond,
+                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                      "loopcond");
+
+Finally, we evaluate the exit value of the loop, to determine whether
+the loop should exit. This mirrors the condition evaluation for the
+if/then/else statement.
+
+.. code-block:: c++
+
+      // Create the "after loop" block and insert it.
+      BasicBlock *LoopEndBB = Builder.GetInsertBlock();
+      BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+
+      // Insert the conditional branch into the end of LoopEndBB.
+      Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+
+      // Any new code will be inserted in AfterBB.
+      Builder.SetInsertPoint(AfterBB);
+
+With the code for the body of the loop complete, we just need to finish
+up the control flow for it. This code remembers the end block (for the
+phi node), then creates the block for the loop exit ("afterloop"). Based
+on the value of the exit condition, it creates a conditional branch that
+chooses between executing the loop again and exiting the loop. Any
+future code is emitted in the "afterloop" block, so it sets the
+insertion position to it.
+
+.. code-block:: c++
+
+      // Add a new entry to the PHI node for the backedge.
+      Variable->addIncoming(NextVar, LoopEndBB);
+
+      // Restore the unshadowed variable.
+      if (OldVal)
+        NamedValues[VarName] = OldVal;
+      else
+        NamedValues.erase(VarName);
+
+      // for expr always returns 0.0.
+      return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+    }
+
+The final code handles various cleanups: now that we have the "NextVar"
+value, we can add the incoming value to the loop PHI node. After that,
+we remove the loop variable from the symbol table, so that it isn't in
+scope after the for loop. Finally, code generation of the for loop
+always returns 0.0, so that is what we return from
+``ForExprAST::Codegen``.
+
+With this, we conclude the "adding control flow to Kaleidoscope" chapter
+of the tutorial. In this chapter we added two control flow constructs,
+and used them to motivate a couple of aspects of the LLVM IR that are
+important for front-end implementors to know. In the next chapter of our
+saga, we will get a bit crazier and add `user-defined
+operators <LangImpl6.html>`_ to our poor innocent language.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+the if/then/else and for expressions.. To build this example, use:
+
+.. code-block:: bash
+
+    # Compile
+    clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
+    # Run
+    ./toy
+
+Here is the code:
+
+.. code-block:: c++
+
+    #include "llvm/DerivedTypes.h"
+    #include "llvm/ExecutionEngine/ExecutionEngine.h"
+    #include "llvm/ExecutionEngine/JIT.h"
+    #include "llvm/IRBuilder.h"
+    #include "llvm/LLVMContext.h"
+    #include "llvm/Module.h"
+    #include "llvm/PassManager.h"
+    #include "llvm/Analysis/Verifier.h"
+    #include "llvm/Analysis/Passes.h"
+    #include "llvm/DataLayout.h"
+    #include "llvm/Transforms/Scalar.h"
+    #include "llvm/Support/TargetSelect.h"
+    #include <cstdio>
+    #include <string>
+    #include <map>
+    #include <vector>
+    using namespace llvm;
+
+    //===----------------------------------------------------------------------===//
+    // Lexer
+    //===----------------------------------------------------------------------===//
+
+    // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+    // of these for known things.
+    enum Token {
+      tok_eof = -1,
+
+      // commands
+      tok_def = -2, tok_extern = -3,
+
+      // primary
+      tok_identifier = -4, tok_number = -5,
+
+      // control
+      tok_if = -6, tok_then = -7, tok_else = -8,
+      tok_for = -9, tok_in = -10
+    };
+
+    static std::string IdentifierStr;  // Filled in if tok_identifier
+    static double NumVal;              // Filled in if tok_number
+
+    /// gettok - Return the next token from standard input.
+    static int gettok() {
+      static int LastChar = ' ';
+
+      // Skip any whitespace.
+      while (isspace(LastChar))
+        LastChar = getchar();
+
+      if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+        IdentifierStr = LastChar;
+        while (isalnum((LastChar = getchar())))
+          IdentifierStr += LastChar;
+
+        if (IdentifierStr == "def") return tok_def;
+        if (IdentifierStr == "extern") return tok_extern;
+        if (IdentifierStr == "if") return tok_if;
+        if (IdentifierStr == "then") return tok_then;
+        if (IdentifierStr == "else") return tok_else;
+        if (IdentifierStr == "for") return tok_for;
+        if (IdentifierStr == "in") return tok_in;
+        return tok_identifier;
+      }
+
+      if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+        std::string NumStr;
+        do {
+          NumStr += LastChar;
+          LastChar = getchar();
+        } while (isdigit(LastChar) || LastChar == '.');
+
+        NumVal = strtod(NumStr.c_str(), 0);
+        return tok_number;
+      }
+
+      if (LastChar == '#') {
+        // Comment until end of line.
+        do LastChar = getchar();
+        while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+        if (LastChar != EOF)
+          return gettok();
+      }
+
+      // Check for end of file.  Don't eat the EOF.
+      if (LastChar == EOF)
+        return tok_eof;
+
+      // Otherwise, just return the character as its ascii value.
+      int ThisChar = LastChar;
+      LastChar = getchar();
+      return ThisChar;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Abstract Syntax Tree (aka Parse Tree)
+    //===----------------------------------------------------------------------===//
+
+    /// ExprAST - Base class for all expression nodes.
+    class ExprAST {
+    public:
+      virtual ~ExprAST() {}
+      virtual Value *Codegen() = 0;
+    };
+
+    /// NumberExprAST - Expression class for numeric literals like "1.0".
+    class NumberExprAST : public ExprAST {
+      double Val;
+    public:
+      NumberExprAST(double val) : Val(val) {}
+      virtual Value *Codegen();
+    };
+
+    /// VariableExprAST - Expression class for referencing a variable, like "a".
+    class VariableExprAST : public ExprAST {
+      std::string Name;
+    public:
+      VariableExprAST(const std::string &name) : Name(name) {}
+      virtual Value *Codegen();
+    };
+
+    /// BinaryExprAST - Expression class for a binary operator.
+    class BinaryExprAST : public ExprAST {
+      char Op;
+      ExprAST *LHS, *RHS;
+    public:
+      BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
+        : Op(op), LHS(lhs), RHS(rhs) {}
+      virtual Value *Codegen();
+    };
+
+    /// CallExprAST - Expression class for function calls.
+    class CallExprAST : public ExprAST {
+      std::string Callee;
+      std::vector<ExprAST*> Args;
+    public:
+      CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+        : Callee(callee), Args(args) {}
+      virtual Value *Codegen();
+    };
+
+    /// IfExprAST - Expression class for if/then/else.
+    class IfExprAST : public ExprAST {
+      ExprAST *Cond, *Then, *Else;
+    public:
+      IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+      : Cond(cond), Then(then), Else(_else) {}
+      virtual Value *Codegen();
+    };
+
+    /// ForExprAST - Expression class for for/in.
+    class ForExprAST : public ExprAST {
+      std::string VarName;
+      ExprAST *Start, *End, *Step, *Body;
+    public:
+      ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
+                 ExprAST *step, ExprAST *body)
+        : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+      virtual Value *Codegen();
+    };
+
+    /// PrototypeAST - This class represents the "prototype" for a function,
+    /// which captures its name, and its argument names (thus implicitly the number
+    /// of arguments the function takes).
+    class PrototypeAST {
+      std::string Name;
+      std::vector<std::string> Args;
+    public:
+      PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+        : Name(name), Args(args) {}
+
+      Function *Codegen();
+    };
+
+    /// FunctionAST - This class represents a function definition itself.
+    class FunctionAST {
+      PrototypeAST *Proto;
+      ExprAST *Body;
+    public:
+      FunctionAST(PrototypeAST *proto, ExprAST *body)
+        : Proto(proto), Body(body) {}
+
+      Function *Codegen();
+    };
+
+    //===----------------------------------------------------------------------===//
+    // Parser
+    //===----------------------------------------------------------------------===//
+
+    /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+    /// token the parser is looking at.  getNextToken reads another token from the
+    /// lexer and updates CurTok with its results.
+    static int CurTok;
+    static int getNextToken() {
+      return CurTok = gettok();
+    }
+
+    /// BinopPrecedence - This holds the precedence for each binary operator that is
+    /// defined.
+    static std::map<char, int> BinopPrecedence;
+
+    /// GetTokPrecedence - Get the precedence of the pending binary operator token.
+    static int GetTokPrecedence() {
+      if (!isascii(CurTok))
+        return -1;
+
+      // Make sure it's a declared binop.
+      int TokPrec = BinopPrecedence[CurTok];
+      if (TokPrec <= 0) return -1;
+      return TokPrec;
+    }
+
+    /// Error* - These are little helper functions for error handling.
+    ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+    PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+    FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+    static ExprAST *ParseExpression();
+
+    /// identifierexpr
+    ///   ::= identifier
+    ///   ::= identifier '(' expression* ')'
+    static ExprAST *ParseIdentifierExpr() {
+      std::string IdName = IdentifierStr;
+
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '(') // Simple variable ref.
+        return new VariableExprAST(IdName);
+
+      // Call.
+      getNextToken();  // eat (
+      std::vector<ExprAST*> Args;
+      if (CurTok != ')') {
+        while (1) {
+          ExprAST *Arg = ParseExpression();
+          if (!Arg) return 0;
+          Args.push_back(Arg);
+
+          if (CurTok == ')') break;
+
+          if (CurTok != ',')
+            return Error("Expected ')' or ',' in argument list");
+          getNextToken();
+        }
+      }
+
+      // Eat the ')'.
+      getNextToken();
+
+      return new CallExprAST(IdName, Args);
+    }
+
+    /// numberexpr ::= number
+    static ExprAST *ParseNumberExpr() {
+      ExprAST *Result = new NumberExprAST(NumVal);
+      getNextToken(); // consume the number
+      return Result;
+    }
+
+    /// parenexpr ::= '(' expression ')'
+    static ExprAST *ParseParenExpr() {
+      getNextToken();  // eat (.
+      ExprAST *V = ParseExpression();
+      if (!V) return 0;
+
+      if (CurTok != ')')
+        return Error("expected ')'");
+      getNextToken();  // eat ).
+      return V;
+    }
+
+    /// ifexpr ::= 'if' expression 'then' expression 'else' expression
+    static ExprAST *ParseIfExpr() {
+      getNextToken();  // eat the if.
+
+      // condition.
+      ExprAST *Cond = ParseExpression();
+      if (!Cond) return 0;
+
+      if (CurTok != tok_then)
+        return Error("expected then");
+      getNextToken();  // eat the then
+
+      ExprAST *Then = ParseExpression();
+      if (Then == 0) return 0;
+
+      if (CurTok != tok_else)
+        return Error("expected else");
+
+      getNextToken();
+
+      ExprAST *Else = ParseExpression();
+      if (!Else) return 0;
+
+      return new IfExprAST(Cond, Then, Else);
+    }
+
+    /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+    static ExprAST *ParseForExpr() {
+      getNextToken();  // eat the for.
+
+      if (CurTok != tok_identifier)
+        return Error("expected identifier after for");
+
+      std::string IdName = IdentifierStr;
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '=')
+        return Error("expected '=' after for");
+      getNextToken();  // eat '='.
+
+
+      ExprAST *Start = ParseExpression();
+      if (Start == 0) return 0;
+      if (CurTok != ',')
+        return Error("expected ',' after for start value");
+      getNextToken();
+
+      ExprAST *End = ParseExpression();
+      if (End == 0) return 0;
+
+      // The step value is optional.
+      ExprAST *Step = 0;
+      if (CurTok == ',') {
+        getNextToken();
+        Step = ParseExpression();
+        if (Step == 0) return 0;
+      }
+
+      if (CurTok != tok_in)
+        return Error("expected 'in' after for");
+      getNextToken();  // eat 'in'.
+
+      ExprAST *Body = ParseExpression();
+      if (Body == 0) return 0;
+
+      return new ForExprAST(IdName, Start, End, Step, Body);
+    }
+
+    /// primary
+    ///   ::= identifierexpr
+    ///   ::= numberexpr
+    ///   ::= parenexpr
+    ///   ::= ifexpr
+    ///   ::= forexpr
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      case tok_if:         return ParseIfExpr();
+      case tok_for:        return ParseForExpr();
+      }
+    }
+
+    /// binoprhs
+    ///   ::= ('+' primary)*
+    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+      // If this is a binop, find its precedence.
+      while (1) {
+        int TokPrec = GetTokPrecedence();
+
+        // If this is a binop that binds at least as tightly as the current binop,
+        // consume it, otherwise we are done.
+        if (TokPrec < ExprPrec)
+          return LHS;
+
+        // Okay, we know this is a binop.
+        int BinOp = CurTok;
+        getNextToken();  // eat binop
+
+        // Parse the primary expression after the binary operator.
+        ExprAST *RHS = ParsePrimary();
+        if (!RHS) return 0;
+
+        // If BinOp binds less tightly with RHS than the operator after RHS, let
+        // the pending operator take RHS as its LHS.
+        int NextPrec = GetTokPrecedence();
+        if (TokPrec < NextPrec) {
+          RHS = ParseBinOpRHS(TokPrec+1, RHS);
+          if (RHS == 0) return 0;
+        }
+
+        // Merge LHS/RHS.
+        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+      }
+    }
+
+    /// expression
+    ///   ::= primary binoprhs
+    ///
+    static ExprAST *ParseExpression() {
+      ExprAST *LHS = ParsePrimary();
+      if (!LHS) return 0;
+
+      return ParseBinOpRHS(0, LHS);
+    }
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    static PrototypeAST *ParsePrototype() {
+      if (CurTok != tok_identifier)
+        return ErrorP("Expected function name in prototype");
+
+      std::string FnName = IdentifierStr;
+      getNextToken();
+
+      if (CurTok != '(')
+        return ErrorP("Expected '(' in prototype");
+
+      std::vector<std::string> ArgNames;
+      while (getNextToken() == tok_identifier)
+        ArgNames.push_back(IdentifierStr);
+      if (CurTok != ')')
+        return ErrorP("Expected ')' in prototype");
+
+      // success.
+      getNextToken();  // eat ')'.
+
+      return new PrototypeAST(FnName, ArgNames);
+    }
+
+    /// definition ::= 'def' prototype expression
+    static FunctionAST *ParseDefinition() {
+      getNextToken();  // eat def.
+      PrototypeAST *Proto = ParsePrototype();
+      if (Proto == 0) return 0;
+
+      if (ExprAST *E = ParseExpression())
+        return new FunctionAST(Proto, E);
+      return 0;
+    }
+
+    /// toplevelexpr ::= expression
+    static FunctionAST *ParseTopLevelExpr() {
+      if (ExprAST *E = ParseExpression()) {
+        // Make an anonymous proto.
+        PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+        return new FunctionAST(Proto, E);
+      }
+      return 0;
+    }
+
+    /// external ::= 'extern' prototype
+    static PrototypeAST *ParseExtern() {
+      getNextToken();  // eat extern.
+      return ParsePrototype();
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Code Generation
+    //===----------------------------------------------------------------------===//
+
+    static Module *TheModule;
+    static IRBuilder<> Builder(getGlobalContext());
+    static std::map<std::string, Value*> NamedValues;
+    static FunctionPassManager *TheFPM;
+
+    Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+    Value *NumberExprAST::Codegen() {
+      return ConstantFP::get(getGlobalContext(), APFloat(Val));
+    }
+
+    Value *VariableExprAST::Codegen() {
+      // Look this variable up in the function.
+      Value *V = NamedValues[Name];
+      return V ? V : ErrorV("Unknown variable name");
+    }
+
+    Value *BinaryExprAST::Codegen() {
+      Value *L = LHS->Codegen();
+      Value *R = RHS->Codegen();
+      if (L == 0 || R == 0) return 0;
+
+      switch (Op) {
+      case '+': return Builder.CreateFAdd(L, R, "addtmp");
+      case '-': return Builder.CreateFSub(L, R, "subtmp");
+      case '*': return Builder.CreateFMul(L, R, "multmp");
+      case '<':
+        L = Builder.CreateFCmpULT(L, R, "cmptmp");
+        // Convert bool 0/1 to double 0.0 or 1.0
+        return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                    "booltmp");
+      default: return ErrorV("invalid binary operator");
+      }
+    }
+
+    Value *CallExprAST::Codegen() {
+      // Look up the name in the global module table.
+      Function *CalleeF = TheModule->getFunction(Callee);
+      if (CalleeF == 0)
+        return ErrorV("Unknown function referenced");
+
+      // If argument mismatch error.
+      if (CalleeF->arg_size() != Args.size())
+        return ErrorV("Incorrect # arguments passed");
+
+      std::vector<Value*> ArgsV;
+      for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+        ArgsV.push_back(Args[i]->Codegen());
+        if (ArgsV.back() == 0) return 0;
+      }
+
+      return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
+    }
+
+    Value *IfExprAST::Codegen() {
+      Value *CondV = Cond->Codegen();
+      if (CondV == 0) return 0;
+
+      // Convert condition to a bool by comparing equal to 0.0.
+      CondV = Builder.CreateFCmpONE(CondV,
+                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                    "ifcond");
+
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+      // Create blocks for the then and else cases.  Insert the 'then' block at the
+      // end of the function.
+      BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+      BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+      BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+
+      Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+
+      // Emit then value.
+      Builder.SetInsertPoint(ThenBB);
+
+      Value *ThenV = Then->Codegen();
+      if (ThenV == 0) return 0;
+
+      Builder.CreateBr(MergeBB);
+      // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+      ThenBB = Builder.GetInsertBlock();
+
+      // Emit else block.
+      TheFunction->getBasicBlockList().push_back(ElseBB);
+      Builder.SetInsertPoint(ElseBB);
+
+      Value *ElseV = Else->Codegen();
+      if (ElseV == 0) return 0;
+
+      Builder.CreateBr(MergeBB);
+      // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+      ElseBB = Builder.GetInsertBlock();
+
+      // Emit merge block.
+      TheFunction->getBasicBlockList().push_back(MergeBB);
+      Builder.SetInsertPoint(MergeBB);
+      PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
+                                      "iftmp");
+
+      PN->addIncoming(ThenV, ThenBB);
+      PN->addIncoming(ElseV, ElseBB);
+      return PN;
+    }
+
+    Value *ForExprAST::Codegen() {
+      // Output this as:
+      //   ...
+      //   start = startexpr
+      //   goto loop
+      // loop:
+      //   variable = phi [start, loopheader], [nextvariable, loopend]
+      //   ...
+      //   bodyexpr
+      //   ...
+      // loopend:
+      //   step = stepexpr
+      //   nextvariable = variable + step
+      //   endcond = endexpr
+      //   br endcond, loop, endloop
+      // outloop:
+
+      // Emit the start code first, without 'variable' in scope.
+      Value *StartVal = Start->Codegen();
+      if (StartVal == 0) return 0;
+
+      // Make the new basic block for the loop header, inserting after current
+      // block.
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+      BasicBlock *PreheaderBB = Builder.GetInsertBlock();
+      BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+
+      // Insert an explicit fall through from the current block to the LoopBB.
+      Builder.CreateBr(LoopBB);
+
+      // Start insertion in LoopBB.
+      Builder.SetInsertPoint(LoopBB);
+
+      // Start the PHI node with an entry for Start.
+      PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
+      Variable->addIncoming(StartVal, PreheaderBB);
+
+      // Within the loop, the variable is defined equal to the PHI node.  If it
+      // shadows an existing variable, we have to restore it, so save it now.
+      Value *OldVal = NamedValues[VarName];
+      NamedValues[VarName] = Variable;
+
+      // Emit the body of the loop.  This, like any other expr, can change the
+      // current BB.  Note that we ignore the value computed by the body, but don't
+      // allow an error.
+      if (Body->Codegen() == 0)
+        return 0;
+
+      // Emit the step value.
+      Value *StepVal;
+      if (Step) {
+        StepVal = Step->Codegen();
+        if (StepVal == 0) return 0;
+      } else {
+        // If not specified, use 1.0.
+        StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+      }
+
+      Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
+
+      // Compute the end condition.
+      Value *EndCond = End->Codegen();
+      if (EndCond == 0) return EndCond;
+
+      // Convert condition to a bool by comparing equal to 0.0.
+      EndCond = Builder.CreateFCmpONE(EndCond,
+                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                      "loopcond");
+
+      // Create the "after loop" block and insert it.
+      BasicBlock *LoopEndBB = Builder.GetInsertBlock();
+      BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+
+      // Insert the conditional branch into the end of LoopEndBB.
+      Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+
+      // Any new code will be inserted in AfterBB.
+      Builder.SetInsertPoint(AfterBB);
+
+      // Add a new entry to the PHI node for the backedge.
+      Variable->addIncoming(NextVar, LoopEndBB);
+
+      // Restore the unshadowed variable.
+      if (OldVal)
+        NamedValues[VarName] = OldVal;
+      else
+        NamedValues.erase(VarName);
+
+
+      // for expr always returns 0.0.
+      return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+    }
+
+    Function *PrototypeAST::Codegen() {
+      // Make the function type:  double(double,double) etc.
+      std::vector<Type*> Doubles(Args.size(),
+                                 Type::getDoubleTy(getGlobalContext()));
+      FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                           Doubles, false);
+
+      Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+
+      // If F conflicted, there was already something named 'Name'.  If it has a
+      // body, don't allow redefinition or reextern.
+      if (F->getName() != Name) {
+        // Delete the one we just made and get the existing one.
+        F->eraseFromParent();
+        F = TheModule->getFunction(Name);
+
+        // If F already has a body, reject this.
+        if (!F->empty()) {
+          ErrorF("redefinition of function");
+          return 0;
+        }
+
+        // If F took a different number of args, reject.
+        if (F->arg_size() != Args.size()) {
+          ErrorF("redefinition of function with different # args");
+          return 0;
+        }
+      }
+
+      // Set names for all arguments.
+      unsigned Idx = 0;
+      for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+           ++AI, ++Idx) {
+        AI->setName(Args[Idx]);
+
+        // Add arguments to variable symbol table.
+        NamedValues[Args[Idx]] = AI;
+      }
+
+      return F;
+    }
+
+    Function *FunctionAST::Codegen() {
+      NamedValues.clear();
+
+      Function *TheFunction = Proto->Codegen();
+      if (TheFunction == 0)
+        return 0;
+
+      // Create a new basic block to start insertion into.
+      BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+      Builder.SetInsertPoint(BB);
+
+      if (Value *RetVal = Body->Codegen()) {
+        // Finish off the function.
+        Builder.CreateRet(RetVal);
+
+        // Validate the generated code, checking for consistency.
+        verifyFunction(*TheFunction);
+
+        // Optimize the function.
+        TheFPM->run(*TheFunction);
+
+        return TheFunction;
+      }
+
+      // Error reading body, remove function.
+      TheFunction->eraseFromParent();
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Top-Level parsing and JIT Driver
+    //===----------------------------------------------------------------------===//
+
+    static ExecutionEngine *TheExecutionEngine;
+
+    static void HandleDefinition() {
+      if (FunctionAST *F = ParseDefinition()) {
+        if (Function *LF = F->Codegen()) {
+          fprintf(stderr, "Read function definition:");
+          LF->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleExtern() {
+      if (PrototypeAST *P = ParseExtern()) {
+        if (Function *F = P->Codegen()) {
+          fprintf(stderr, "Read extern: ");
+          F->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleTopLevelExpression() {
+      // Evaluate a top-level expression into an anonymous function.
+      if (FunctionAST *F = ParseTopLevelExpr()) {
+        if (Function *LF = F->Codegen()) {
+          // JIT the function, returning a function pointer.
+          void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+
+          // Cast it to the right type (takes no arguments, returns a double) so we
+          // can call it as a native function.
+          double (*FP)() = (double (*)())(intptr_t)FPtr;
+          fprintf(stderr, "Evaluated to %f\n", FP());
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    /// top ::= definition | external | expression | ';'
+    static void MainLoop() {
+      while (1) {
+        fprintf(stderr, "ready> ");
+        switch (CurTok) {
+        case tok_eof:    return;
+        case ';':        getNextToken(); break;  // ignore top-level semicolons.
+        case tok_def:    HandleDefinition(); break;
+        case tok_extern: HandleExtern(); break;
+        default:         HandleTopLevelExpression(); break;
+        }
+      }
+    }
+
+    //===----------------------------------------------------------------------===//
+    // "Library" functions that can be "extern'd" from user code.
+    //===----------------------------------------------------------------------===//
+
+    /// putchard - putchar that takes a double and returns 0.
+    extern "C"
+    double putchard(double X) {
+      putchar((char)X);
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Main driver code.
+    //===----------------------------------------------------------------------===//
+
+    int main() {
+      InitializeNativeTarget();
+      LLVMContext &Context = getGlobalContext();
+
+      // Install standard binary operators.
+      // 1 is lowest precedence.
+      BinopPrecedence['<'] = 10;
+      BinopPrecedence['+'] = 20;
+      BinopPrecedence['-'] = 20;
+      BinopPrecedence['*'] = 40;  // highest.
+
+      // Prime the first token.
+      fprintf(stderr, "ready> ");
+      getNextToken();
+
+      // Make the module, which holds all the code.
+      TheModule = new Module("my cool jit", Context);
+
+      // Create the JIT.  This takes ownership of the module.
+      std::string ErrStr;
+      TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+      if (!TheExecutionEngine) {
+        fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
+        exit(1);
+      }
+
+      FunctionPassManager OurFPM(TheModule);
+
+      // Set up the optimizer pipeline.  Start with registering info about how the
+      // target lays out data structures.
+      OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
+      // Provide basic AliasAnalysis support for GVN.
+      OurFPM.add(createBasicAliasAnalysisPass());
+      // Do simple "peephole" optimizations and bit-twiddling optzns.
+      OurFPM.add(createInstructionCombiningPass());
+      // Reassociate expressions.
+      OurFPM.add(createReassociatePass());
+      // Eliminate Common SubExpressions.
+      OurFPM.add(createGVNPass());
+      // Simplify the control flow graph (deleting unreachable blocks, etc).
+      OurFPM.add(createCFGSimplificationPass());
+
+      OurFPM.doInitialization();
+
+      // Set the global so the code gen can use this.
+      TheFPM = &OurFPM;
+
+      // Run the main "interpreter loop" now.
+      MainLoop();
+
+      TheFPM = 0;
+
+      // Print out all of the generated code.
+      TheModule->dump();
+
+      return 0;
+    }
+
+`Next: Extending the language: user-defined operators <LangImpl6.html>`_
+
diff --git a/docs/tutorial/LangImpl6.html b/docs/tutorial/LangImpl6.html
deleted file mode 100644
index bf502e7da9..0000000000
--- a/docs/tutorial/LangImpl6.html
+++ /dev/null
@@ -1,1829 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Extending the Language: User-defined Operators</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Extending the Language: User-defined Operators</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 6
-  <ol>
-    <li><a href="#intro">Chapter 6 Introduction</a></li>
-    <li><a href="#idea">User-defined Operators: the Idea</a></li>
-    <li><a href="#binary">User-defined Binary Operators</a></li>
-    <li><a href="#unary">User-defined Unary Operators</a></li>
-    <li><a href="#example">Kicking the Tires</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="LangImpl7.html">Chapter 7</a>: Extending the Language: Mutable
-Variables / SSA Construction</li>
-</ul>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 6 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 6 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  At this point in our tutorial, we now have a fully
-functional language that is fairly minimal, but also useful.  There
-is still one big problem with it, however. Our language doesn't have many 
-useful operators (like division, logical negation, or even any comparisons 
-besides less-than).</p>
-
-<p>This chapter of the tutorial takes a wild digression into adding user-defined
-operators to the simple and beautiful Kaleidoscope language. This digression now gives 
-us a simple and ugly language in some ways, but also a powerful one at the same time.
-One of the great things about creating your own language is that you get to
-decide what is good or bad.  In this tutorial we'll assume that it is okay to
-use this as a way to show some interesting parsing techniques.</p>
-
-<p>At the end of this tutorial, we'll run through an example Kaleidoscope 
-application that <a href="#example">renders the Mandelbrot set</a>.  This gives 
-an example of what you can build with Kaleidoscope and its feature set.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="idea">User-defined Operators: the Idea</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-The "operator overloading" that we will add to Kaleidoscope is more general than
-languages like C++.  In C++, you are only allowed to redefine existing
-operators: you can't programatically change the grammar, introduce new
-operators, change precedence levels, etc.  In this chapter, we will add this
-capability to Kaleidoscope, which will let the user round out the set of
-operators that are supported.</p>
-
-<p>The point of going into user-defined operators in a tutorial like this is to
-show the power and flexibility of using a hand-written parser.  Thus far, the parser
-we have been implementing uses recursive descent for most parts of the grammar and 
-operator precedence parsing for the expressions.  See <a 
-href="LangImpl2.html">Chapter 2</a> for details.  Without using operator
-precedence parsing, it would be very difficult to allow the programmer to
-introduce new operators into the grammar: the grammar is dynamically extensible
-as the JIT runs.</p>
-
-<p>The two specific features we'll add are programmable unary operators (right
-now, Kaleidoscope has no unary operators at all) as well as binary operators.
-An example of this is:</p>
-
-<div class="doc_code">
-<pre>
-# Logical unary not.
-def unary!(v)
-  if v then
-    0
-  else
-    1;
-
-# Define &gt; with the same precedence as &lt;.
-def binary&gt; 10 (LHS RHS)
-  RHS &lt; LHS;
-
-# Binary "logical or", (note that it does not "short circuit")
-def binary| 5 (LHS RHS)
-  if LHS then
-    1
-  else if RHS then
-    1
-  else
-    0;
-
-# Define = with slightly lower precedence than relationals.
-def binary= 9 (LHS RHS)
-  !(LHS &lt; RHS | LHS &gt; RHS);
-</pre>
-</div>
-
-<p>Many languages aspire to being able to implement their standard runtime
-library in the language itself.  In Kaleidoscope, we can implement significant
-parts of the language in the library!</p>
-
-<p>We will break down implementation of these features into two parts:
-implementing support for user-defined binary operators and adding unary
-operators.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="binary">User-defined Binary Operators</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Adding support for user-defined binary operators is pretty simple with our
-current framework.  We'll first add support for the unary/binary keywords:</p>
-
-<div class="doc_code">
-<pre>
-enum Token {
-  ...
-  <b>// operators
-  tok_binary = -11, tok_unary = -12</b>
-};
-...
-static int gettok() {
-...
-    if (IdentifierStr == "for") return tok_for;
-    if (IdentifierStr == "in") return tok_in;
-    <b>if (IdentifierStr == "binary") return tok_binary;
-    if (IdentifierStr == "unary") return tok_unary;</b>
-    return tok_identifier;
-</pre>
-</div>
-
-<p>This just adds lexer support for the unary and binary keywords, like we
-did in <a href="LangImpl5.html#iflexer">previous chapters</a>.  One nice thing
-about our current AST, is that we represent binary operators with full generalisation
-by using their ASCII code as the opcode.  For our extended operators, we'll use this
-same representation, so we don't need any new AST or parser support.</p>
-
-<p>On the other hand, we have to be able to represent the definitions of these
-new operators, in the "def binary| 5" part of the function definition.  In our
-grammar so far, the "name" for the function definition is parsed as the
-"prototype" production and into the <tt>PrototypeAST</tt> AST node.  To
-represent our new user-defined operators as prototypes, we have to extend
-the  <tt>PrototypeAST</tt> AST node like this:</p>
-
-<div class="doc_code">
-<pre>
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its argument names as well as if it is an operator.
-class PrototypeAST {
-  std::string Name;
-  std::vector&lt;std::string&gt; Args;
-  <b>bool isOperator;
-  unsigned Precedence;  // Precedence if a binary op.</b>
-public:
-  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args,
-               <b>bool isoperator = false, unsigned prec = 0</b>)
-  : Name(name), Args(args), <b>isOperator(isoperator), Precedence(prec)</b> {}
-  
-  <b>bool isUnaryOp() const { return isOperator &amp;&amp; Args.size() == 1; }
-  bool isBinaryOp() const { return isOperator &amp;&amp; Args.size() == 2; }
-  
-  char getOperatorName() const {
-    assert(isUnaryOp() || isBinaryOp());
-    return Name[Name.size()-1];
-  }
-  
-  unsigned getBinaryPrecedence() const { return Precedence; }</b>
-  
-  Function *Codegen();
-};
-</pre>
-</div>
-
-<p>Basically, in addition to knowing a name for the prototype, we now keep track
-of whether it was an operator, and if it was, what precedence level the operator
-is at.  The precedence is only used for binary operators (as you'll see below,
-it just doesn't apply for unary operators).  Now that we have a way to represent
-the prototype for a user-defined operator, we need to parse it:</p>
-
-<div class="doc_code">
-<pre>
-/// prototype
-///   ::= id '(' id* ')'
-<b>///   ::= binary LETTER number? (id, id)</b>
-static PrototypeAST *ParsePrototype() {
-  std::string FnName;
-  
-  <b>unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
-  unsigned BinaryPrecedence = 30;</b>
-  
-  switch (CurTok) {
-  default:
-    return ErrorP("Expected function name in prototype");
-  case tok_identifier:
-    FnName = IdentifierStr;
-    Kind = 0;
-    getNextToken();
-    break;
-  <b>case tok_binary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected binary operator");
-    FnName = "binary";
-    FnName += (char)CurTok;
-    Kind = 2;
-    getNextToken();
-    
-    // Read the precedence if present.
-    if (CurTok == tok_number) {
-      if (NumVal &lt; 1 || NumVal &gt; 100)
-        return ErrorP("Invalid precedecnce: must be 1..100");
-      BinaryPrecedence = (unsigned)NumVal;
-      getNextToken();
-    }
-    break;</b>
-  }
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector&lt;std::string&gt; ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  <b>// Verify right number of names for operator.
-  if (Kind &amp;&amp; ArgNames.size() != Kind)
-    return ErrorP("Invalid number of operands for operator");
-  
-  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);</b>
-}
-</pre>
-</div>
-
-<p>This is all fairly straightforward parsing code, and we have already seen
-a lot of similar code in the past.  One interesting part about the code above is 
-the couple lines that set up <tt>FnName</tt> for binary operators.  This builds names 
-like "binary@" for a newly defined "@" operator.  This then takes advantage of the 
-fact that symbol names in the LLVM symbol table are allowed to have any character in
-them, including embedded nul characters.</p>
-
-<p>The next interesting thing to add, is codegen support for these binary operators.
-Given our current structure, this is a simple addition of a default case for our
-existing binary operator node:</p>
-
-<div class="doc_code">
-<pre>
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS-&gt;Codegen();
-  Value *R = RHS-&gt;Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '&lt;':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  <b>default: break;</b>
-  }
-  
-  <b>// If it wasn't a builtin binary operator, it must be a user defined one. Emit
-  // a call to it.
-  Function *F = TheModule-&gt;getFunction(std::string("binary")+Op);
-  assert(F &amp;&amp; "binary operator not found!");
-  
-  Value *Ops[2] = { L, R };
-  return Builder.CreateCall(F, Ops, "binop");</b>
-}
-
-</pre>
-</div>
-
-<p>As you can see above, the new code is actually really simple.  It just does
-a lookup for the appropriate operator in the symbol table and generates a 
-function call to it.  Since user-defined operators are just built as normal
-functions (because the "prototype" boils down to a function with the right
-name) everything falls into place.</p>
-
-<p>The final piece of code we are missing, is a bit of top-level magic:</p>
-
-<div class="doc_code">
-<pre>
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto->Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  <b>// If this is an operator, install it.
-  if (Proto-&gt;isBinaryOp())
-    BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();</b>
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body-&gt;Codegen()) {
-    ...
-</pre>
-</div>
-
-<p>Basically, before codegening a function, if it is a user-defined operator, we
-register it in the precedence table.  This allows the binary operator parsing
-logic we already have in place to handle it.  Since we are working on a fully-general operator precedence parser, this is all we need to do to "extend the grammar".</p>
-
-<p>Now we have useful user-defined binary operators.  This builds a lot
-on the previous framework we built for other operators.  Adding unary operators
-is a bit more challenging, because we don't have any framework for it yet - lets
-see what it takes.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="unary">User-defined Unary Operators</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Since we don't currently support unary operators in the Kaleidoscope
-language, we'll need to add everything to support them.  Above, we added simple
-support for the 'unary' keyword to the lexer.  In addition to that, we need an
-AST node:</p>
-
-<div class="doc_code">
-<pre>
-/// UnaryExprAST - Expression class for a unary operator.
-class UnaryExprAST : public ExprAST {
-  char Opcode;
-  ExprAST *Operand;
-public:
-  UnaryExprAST(char opcode, ExprAST *operand) 
-    : Opcode(opcode), Operand(operand) {}
-  virtual Value *Codegen();
-};
-</pre>
-</div>
-
-<p>This AST node is very simple and obvious by now.  It directly mirrors the
-binary operator AST node, except that it only has one child.  With this, we
-need to add the parsing logic.  Parsing a unary operator is pretty simple: we'll
-add a new function to do it:</p>
-
-<div class="doc_code">
-<pre>
-/// unary
-///   ::= primary
-///   ::= '!' unary
-static ExprAST *ParseUnary() {
-  // If the current token is not an operator, it must be a primary expr.
-  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
-    return ParsePrimary();
-  
-  // If this is a unary operator, read it.
-  int Opc = CurTok;
-  getNextToken();
-  if (ExprAST *Operand = ParseUnary())
-    return new UnaryExprAST(Opc, Operand);
-  return 0;
-}
-</pre>
-</div>
-
-<p>The grammar we add is pretty straightforward here.  If we see a unary
-operator when parsing a primary operator, we eat the operator as a prefix and
-parse the remaining piece as another unary operator.  This allows us to handle
-multiple unary operators (e.g. "!!x").  Note that unary operators can't have 
-ambiguous parses like binary operators can, so there is no need for precedence
-information.</p>
-
-<p>The problem with this function, is that we need to call ParseUnary from somewhere.
-To do this, we change previous callers of ParsePrimary to call ParseUnary
-instead:</p>
-
-<div class="doc_code">
-<pre>
-/// binoprhs
-///   ::= ('+' unary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  ...
-    <b>// Parse the unary expression after the binary operator.
-    ExprAST *RHS = ParseUnary();
-    if (!RHS) return 0;</b>
-  ...
-}
-/// expression
-///   ::= unary binoprhs
-///
-static ExprAST *ParseExpression() {
-  <b>ExprAST *LHS = ParseUnary();</b>
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-</pre>
-</div>
-
-<p>With these two simple changes, we are now able to parse unary operators and build the
-AST for them.  Next up, we need to add parser support for prototypes, to parse
-the unary operator prototype.  We extend the binary operator code above 
-with:</p>
-
-<div class="doc_code">
-<pre>
-/// prototype
-///   ::= id '(' id* ')'
-///   ::= binary LETTER number? (id, id)
-<b>///   ::= unary LETTER (id)</b>
-static PrototypeAST *ParsePrototype() {
-  std::string FnName;
-  
-  unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
-  unsigned BinaryPrecedence = 30;
-  
-  switch (CurTok) {
-  default:
-    return ErrorP("Expected function name in prototype");
-  case tok_identifier:
-    FnName = IdentifierStr;
-    Kind = 0;
-    getNextToken();
-    break;
-  <b>case tok_unary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected unary operator");
-    FnName = "unary";
-    FnName += (char)CurTok;
-    Kind = 1;
-    getNextToken();
-    break;</b>
-  case tok_binary:
-    ...
-</pre>
-</div>
-
-<p>As with binary operators, we name unary operators with a name that includes
-the operator character.  This assists us at code generation time.  Speaking of,
-the final piece we need to add is codegen support for unary operators.  It looks
-like this:</p>
-
-<div class="doc_code">
-<pre>
-Value *UnaryExprAST::Codegen() {
-  Value *OperandV = Operand->Codegen();
-  if (OperandV == 0) return 0;
-  
-  Function *F = TheModule->getFunction(std::string("unary")+Opcode);
-  if (F == 0)
-    return ErrorV("Unknown unary operator");
-  
-  return Builder.CreateCall(F, OperandV, "unop");
-}
-</pre>
-</div>
-
-<p>This code is similar to, but simpler than, the code for binary operators.  It
-is simpler primarily because it doesn't need to handle any predefined operators.
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="example">Kicking the Tires</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>It is somewhat hard to believe, but with a few simple extensions we've
-covered in the last chapters, we have grown a real-ish language.  With this, we 
-can do a lot of interesting things, including I/O, math, and a bunch of other
-things.  For example, we can now add a nice sequencing operator (printd is
-defined to print out the specified value and a newline):</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>extern printd(x);</b>
-Read extern:
-declare double @printd(double)
-
-ready&gt; <b>def binary : 1 (x y) 0;  # Low-precedence operator that ignores operands.</b>
-..
-ready&gt; <b>printd(123) : printd(456) : printd(789);</b>
-123.000000
-456.000000
-789.000000
-Evaluated to 0.000000
-</pre>
-</div>
-
-<p>We can also define a bunch of other "primitive" operations, such as:</p>
-
-<div class="doc_code">
-<pre>
-# Logical unary not.
-def unary!(v)
-  if v then
-    0
-  else
-    1;
-    
-# Unary negate.
-def unary-(v)
-  0-v;
-
-# Define &gt; with the same precedence as &lt;.
-def binary&gt; 10 (LHS RHS)
-  RHS &lt; LHS;
-
-# Binary logical or, which does not short circuit. 
-def binary| 5 (LHS RHS)
-  if LHS then
-    1
-  else if RHS then
-    1
-  else
-    0;
-
-# Binary logical and, which does not short circuit. 
-def binary&amp; 6 (LHS RHS)
-  if !LHS then
-    0
-  else
-    !!RHS;
-
-# Define = with slightly lower precedence than relationals.
-def binary = 9 (LHS RHS)
-  !(LHS &lt; RHS | LHS &gt; RHS);
-
-# Define ':' for sequencing: as a low-precedence operator that ignores operands
-# and just returns the RHS.
-def binary : 1 (x y) y;
-</pre>
-</div>
-
-
-<p>Given the previous if/then/else support, we can also define interesting
-functions for I/O.  For example, the following prints out a character whose
-"density" reflects the value passed in: the lower the value, the denser the
-character:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt;
-<b>
-extern putchard(char)
-def printdensity(d)
-  if d &gt; 8 then
-    putchard(32)  # ' '
-  else if d &gt; 4 then
-    putchard(46)  # '.'
-  else if d &gt; 2 then
-    putchard(43)  # '+'
-  else
-    putchard(42); # '*'</b>
-...
-ready&gt; <b>printdensity(1): printdensity(2): printdensity(3):
-       printdensity(4): printdensity(5): printdensity(9):
-       putchard(10);</b>
-**++.
-Evaluated to 0.000000
-</pre>
-</div>
-
-<p>Based on these simple primitive operations, we can start to define more
-interesting things.  For example, here's a little function that solves for the
-number of iterations it takes a function in the complex plane to
-converge:</p>
-
-<div class="doc_code">
-<pre>
-# Determine whether the specific location diverges.
-# Solve for z = z^2 + c in the complex plane.
-def mandleconverger(real imag iters creal cimag)
-  if iters &gt; 255 | (real*real + imag*imag &gt; 4) then
-    iters
-  else
-    mandleconverger(real*real - imag*imag + creal,
-                    2*real*imag + cimag,
-                    iters+1, creal, cimag);
-
-# Return the number of iterations required for the iteration to escape
-def mandleconverge(real imag)
-  mandleconverger(real, imag, 0, real, imag);
-</pre>
-</div>
-
-<p>This "<code>z = z<sup>2</sup> + c</code>" function is a beautiful little
-creature that is the basis for computation of
-the <a href="http://en.wikipedia.org/wiki/Mandelbrot_set">Mandelbrot Set</a>.
-Our <tt>mandelconverge</tt> function returns the number of iterations that it
-takes for a complex orbit to escape, saturating to 255.  This is not a very
-useful function by itself, but if you plot its value over a two-dimensional
-plane, you can see the Mandelbrot set.  Given that we are limited to using
-putchard here, our amazing graphical output is limited, but we can whip together
-something using the density plotter above:</p>
-
-<div class="doc_code">
-<pre>
-# Compute and plot the mandlebrot set with the specified 2 dimensional range
-# info.
-def mandelhelp(xmin xmax xstep   ymin ymax ystep)
-  for y = ymin, y &lt; ymax, ystep in (
-    (for x = xmin, x &lt; xmax, xstep in
-       printdensity(mandleconverge(x,y)))
-    : putchard(10)
-  )
- 
-# mandel - This is a convenient helper function for plotting the mandelbrot set
-# from the specified position with the specified Magnification.
-def mandel(realstart imagstart realmag imagmag) 
-  mandelhelp(realstart, realstart+realmag*78, realmag,
-             imagstart, imagstart+imagmag*40, imagmag);
-</pre>
-</div>
-
-<p>Given this, we can try plotting out the mandlebrot set!  Lets try it out:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>mandel(-2.3, -1.3, 0.05, 0.07);</b>
-*******************************+++++++++++*************************************
-*************************+++++++++++++++++++++++*******************************
-**********************+++++++++++++++++++++++++++++****************************
-*******************+++++++++++++++++++++.. ...++++++++*************************
-*****************++++++++++++++++++++++.... ...+++++++++***********************
-***************+++++++++++++++++++++++.....   ...+++++++++*********************
-**************+++++++++++++++++++++++....     ....+++++++++********************
-*************++++++++++++++++++++++......      .....++++++++*******************
-************+++++++++++++++++++++.......       .......+++++++******************
-***********+++++++++++++++++++....                ... .+++++++*****************
-**********+++++++++++++++++.......                     .+++++++****************
-*********++++++++++++++...........                    ...+++++++***************
-********++++++++++++............                      ...++++++++**************
-********++++++++++... ..........                        .++++++++**************
-*******+++++++++.....                                   .+++++++++*************
-*******++++++++......                                  ..+++++++++*************
-*******++++++.......                                   ..+++++++++*************
-*******+++++......                                     ..+++++++++*************
-*******.... ....                                      ...+++++++++*************
-*******.... .                                         ...+++++++++*************
-*******+++++......                                    ...+++++++++*************
-*******++++++.......                                   ..+++++++++*************
-*******++++++++......                                   .+++++++++*************
-*******+++++++++.....                                  ..+++++++++*************
-********++++++++++... ..........                        .++++++++**************
-********++++++++++++............                      ...++++++++**************
-*********++++++++++++++..........                     ...+++++++***************
-**********++++++++++++++++........                     .+++++++****************
-**********++++++++++++++++++++....                ... ..+++++++****************
-***********++++++++++++++++++++++.......       .......++++++++*****************
-************+++++++++++++++++++++++......      ......++++++++******************
-**************+++++++++++++++++++++++....      ....++++++++********************
-***************+++++++++++++++++++++++.....   ...+++++++++*********************
-*****************++++++++++++++++++++++....  ...++++++++***********************
-*******************+++++++++++++++++++++......++++++++*************************
-*********************++++++++++++++++++++++.++++++++***************************
-*************************+++++++++++++++++++++++*******************************
-******************************+++++++++++++************************************
-*******************************************************************************
-*******************************************************************************
-*******************************************************************************
-Evaluated to 0.000000
-ready&gt; <b>mandel(-2, -1, 0.02, 0.04);</b>
-**************************+++++++++++++++++++++++++++++++++++++++++++++++++++++
-***********************++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-*********************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.
-*******************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++...
-*****************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.....
-***************++++++++++++++++++++++++++++++++++++++++++++++++++++++++........
-**************++++++++++++++++++++++++++++++++++++++++++++++++++++++...........
-************+++++++++++++++++++++++++++++++++++++++++++++++++++++..............
-***********++++++++++++++++++++++++++++++++++++++++++++++++++........        . 
-**********++++++++++++++++++++++++++++++++++++++++++++++.............          
-********+++++++++++++++++++++++++++++++++++++++++++..................          
-*******+++++++++++++++++++++++++++++++++++++++.......................          
-******+++++++++++++++++++++++++++++++++++...........................           
-*****++++++++++++++++++++++++++++++++............................              
-*****++++++++++++++++++++++++++++...............................               
-****++++++++++++++++++++++++++......   .........................               
-***++++++++++++++++++++++++.........     ......    ...........                 
-***++++++++++++++++++++++............                                          
-**+++++++++++++++++++++..............                                          
-**+++++++++++++++++++................                                          
-*++++++++++++++++++.................                                           
-*++++++++++++++++............ ...                                              
-*++++++++++++++..............                                                  
-*+++....++++................                                                   
-*..........  ...........                                                       
-*                                                                              
-*..........  ...........                                                       
-*+++....++++................                                                   
-*++++++++++++++..............                                                  
-*++++++++++++++++............ ...                                              
-*++++++++++++++++++.................                                           
-**+++++++++++++++++++................                                          
-**+++++++++++++++++++++..............                                          
-***++++++++++++++++++++++............                                          
-***++++++++++++++++++++++++.........     ......    ...........                 
-****++++++++++++++++++++++++++......   .........................               
-*****++++++++++++++++++++++++++++...............................               
-*****++++++++++++++++++++++++++++++++............................              
-******+++++++++++++++++++++++++++++++++++...........................           
-*******+++++++++++++++++++++++++++++++++++++++.......................          
-********+++++++++++++++++++++++++++++++++++++++++++..................          
-Evaluated to 0.000000
-ready&gt; <b>mandel(-0.9, -1.4, 0.02, 0.03);</b>
-*******************************************************************************
-*******************************************************************************
-*******************************************************************************
-**********+++++++++++++++++++++************************************************
-*+++++++++++++++++++++++++++++++++++++++***************************************
-+++++++++++++++++++++++++++++++++++++++++++++**********************************
-++++++++++++++++++++++++++++++++++++++++++++++++++*****************************
-++++++++++++++++++++++++++++++++++++++++++++++++++++++*************************
-+++++++++++++++++++++++++++++++++++++++++++++++++++++++++**********************
-+++++++++++++++++++++++++++++++++.........++++++++++++++++++*******************
-+++++++++++++++++++++++++++++++....   ......+++++++++++++++++++****************
-+++++++++++++++++++++++++++++.......  ........+++++++++++++++++++**************
-++++++++++++++++++++++++++++........   ........++++++++++++++++++++************
-+++++++++++++++++++++++++++.........     ..  ...+++++++++++++++++++++**********
-++++++++++++++++++++++++++...........        ....++++++++++++++++++++++********
-++++++++++++++++++++++++.............       .......++++++++++++++++++++++******
-+++++++++++++++++++++++.............        ........+++++++++++++++++++++++****
-++++++++++++++++++++++...........           ..........++++++++++++++++++++++***
-++++++++++++++++++++...........                .........++++++++++++++++++++++*
-++++++++++++++++++............                  ...........++++++++++++++++++++
-++++++++++++++++...............                 .............++++++++++++++++++
-++++++++++++++.................                 ...............++++++++++++++++
-++++++++++++..................                  .................++++++++++++++
-+++++++++..................                      .................+++++++++++++
-++++++........        .                               .........  ..++++++++++++
-++............                                         ......    ....++++++++++
-..............                                                    ...++++++++++
-..............                                                    ....+++++++++
-..............                                                    .....++++++++
-.............                                                    ......++++++++
-...........                                                     .......++++++++
-.........                                                       ........+++++++
-.........                                                       ........+++++++
-.........                                                           ....+++++++
-........                                                             ...+++++++
-.......                                                              ...+++++++
-                                                                    ....+++++++
-                                                                   .....+++++++
-                                                                    ....+++++++
-                                                                    ....+++++++
-                                                                    ....+++++++
-Evaluated to 0.000000
-ready&gt; <b>^D</b>
-</pre>
-</div>
-
-<p>At this point, you may be starting to realize that Kaleidoscope is a real
-and powerful language.  It may not be self-similar :), but it can be used to
-plot things that are!</p>
-
-<p>With this, we conclude the "adding user-defined operators" chapter of the
-tutorial.  We have successfully augmented our language, adding the ability to extend the
-language in the library, and we have shown how this can be used to build a simple but
-interesting end-user application in Kaleidoscope.  At this point, Kaleidoscope
-can build a variety of applications that are functional and can call functions
-with side-effects, but it can't actually define and mutate a variable itself.
-</p>
-
-<p>Strikingly, variable mutation is an important feature of some
-languages, and it is not at all obvious how to <a href="LangImpl7.html">add
-support for mutable variables</a> without having to add an "SSA construction"
-phase to your front-end.  In the next chapter, we will describe how you can
-add variable mutation without building SSA in your front-end.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with the
-if/then/else and for expressions..  To build this example, use:
-</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
-# Run
-./toy
-</pre>
-</div>
-
-<p>On some platforms, you will need to specify -rdynamic or -Wl,--export-dynamic
-when linking.  This ensures that symbols defined in the main executable are
-exported to the dynamic linker and so are available for symbol resolution at
-run time.  This is not needed if you compile your support code into a shared
-library, although doing that will cause problems on Windows.</p>
-
-<p>Here is the code:</p>
-
-<div class="doc_code">
-<pre>
-#include "llvm/DerivedTypes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/TargetSelect.h"
-#include &lt;cstdio&gt;
-#include &lt;string&gt;
-#include &lt;map&gt;
-#include &lt;vector&gt;
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5,
-  
-  // control
-  tok_if = -6, tok_then = -7, tok_else = -8,
-  tok_for = -9, tok_in = -10,
-  
-  // operators
-  tok_binary = -11, tok_unary = -12
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    if (IdentifierStr == "if") return tok_if;
-    if (IdentifierStr == "then") return tok_then;
-    if (IdentifierStr == "else") return tok_else;
-    if (IdentifierStr == "for") return tok_for;
-    if (IdentifierStr == "in") return tok_in;
-    if (IdentifierStr == "binary") return tok_binary;
-    if (IdentifierStr == "unary") return tok_unary;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &amp;name) : Name(name) {}
-  virtual Value *Codegen();
-};
-
-/// UnaryExprAST - Expression class for a unary operator.
-class UnaryExprAST : public ExprAST {
-  char Opcode;
-  ExprAST *Operand;
-public:
-  UnaryExprAST(char opcode, ExprAST *operand) 
-    : Opcode(opcode), Operand(operand) {}
-  virtual Value *Codegen();
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector&lt;ExprAST*&gt; Args;
-public:
-  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
-    : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
-};
-
-/// IfExprAST - Expression class for if/then/else.
-class IfExprAST : public ExprAST {
-  ExprAST *Cond, *Then, *Else;
-public:
-  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
-  : Cond(cond), Then(then), Else(_else) {}
-  virtual Value *Codegen();
-};
-
-/// ForExprAST - Expression class for for/in.
-class ForExprAST : public ExprAST {
-  std::string VarName;
-  ExprAST *Start, *End, *Step, *Body;
-public:
-  ForExprAST(const std::string &amp;varname, ExprAST *start, ExprAST *end,
-             ExprAST *step, ExprAST *body)
-    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  virtual Value *Codegen();
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes), as well as if it is an operator.
-class PrototypeAST {
-  std::string Name;
-  std::vector&lt;std::string&gt; Args;
-  bool isOperator;
-  unsigned Precedence;  // Precedence if a binary op.
-public:
-  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args,
-               bool isoperator = false, unsigned prec = 0)
-  : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
-  
-  bool isUnaryOp() const { return isOperator &amp;&amp; Args.size() == 1; }
-  bool isBinaryOp() const { return isOperator &amp;&amp; Args.size() == 2; }
-  
-  char getOperatorName() const {
-    assert(isUnaryOp() || isBinaryOp());
-    return Name[Name.size()-1];
-  }
-  
-  unsigned getBinaryPrecedence() const { return Precedence; }
-  
-  Function *Codegen();
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-  Function *Codegen();
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map&lt;char, int&gt; BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec &lt;= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector&lt;ExprAST*&gt; Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// ifexpr ::= 'if' expression 'then' expression 'else' expression
-static ExprAST *ParseIfExpr() {
-  getNextToken();  // eat the if.
-  
-  // condition.
-  ExprAST *Cond = ParseExpression();
-  if (!Cond) return 0;
-  
-  if (CurTok != tok_then)
-    return Error("expected then");
-  getNextToken();  // eat the then
-  
-  ExprAST *Then = ParseExpression();
-  if (Then == 0) return 0;
-  
-  if (CurTok != tok_else)
-    return Error("expected else");
-  
-  getNextToken();
-  
-  ExprAST *Else = ParseExpression();
-  if (!Else) return 0;
-  
-  return new IfExprAST(Cond, Then, Else);
-}
-
-/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
-static ExprAST *ParseForExpr() {
-  getNextToken();  // eat the for.
-
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after for");
-  
-  std::string IdName = IdentifierStr;
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '=')
-    return Error("expected '=' after for");
-  getNextToken();  // eat '='.
-  
-  
-  ExprAST *Start = ParseExpression();
-  if (Start == 0) return 0;
-  if (CurTok != ',')
-    return Error("expected ',' after for start value");
-  getNextToken();
-  
-  ExprAST *End = ParseExpression();
-  if (End == 0) return 0;
-  
-  // The step value is optional.
-  ExprAST *Step = 0;
-  if (CurTok == ',') {
-    getNextToken();
-    Step = ParseExpression();
-    if (Step == 0) return 0;
-  }
-  
-  if (CurTok != tok_in)
-    return Error("expected 'in' after for");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-
-  return new ForExprAST(IdName, Start, End, Step, Body);
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-///   ::= ifexpr
-///   ::= forexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  case tok_if:         return ParseIfExpr();
-  case tok_for:        return ParseForExpr();
-  }
-}
-
-/// unary
-///   ::= primary
-///   ::= '!' unary
-static ExprAST *ParseUnary() {
-  // If the current token is not an operator, it must be a primary expr.
-  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
-    return ParsePrimary();
-  
-  // If this is a unary operator, read it.
-  int Opc = CurTok;
-  getNextToken();
-  if (ExprAST *Operand = ParseUnary())
-    return new UnaryExprAST(Opc, Operand);
-  return 0;
-}
-
-/// binoprhs
-///   ::= ('+' unary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec &lt; ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the unary expression after the binary operator.
-    ExprAST *RHS = ParseUnary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec &lt; NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= unary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParseUnary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-///   ::= binary LETTER number? (id, id)
-///   ::= unary LETTER (id)
-static PrototypeAST *ParsePrototype() {
-  std::string FnName;
-  
-  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
-  unsigned BinaryPrecedence = 30;
-  
-  switch (CurTok) {
-  default:
-    return ErrorP("Expected function name in prototype");
-  case tok_identifier:
-    FnName = IdentifierStr;
-    Kind = 0;
-    getNextToken();
-    break;
-  case tok_unary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected unary operator");
-    FnName = "unary";
-    FnName += (char)CurTok;
-    Kind = 1;
-    getNextToken();
-    break;
-  case tok_binary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected binary operator");
-    FnName = "binary";
-    FnName += (char)CurTok;
-    Kind = 2;
-    getNextToken();
-    
-    // Read the precedence if present.
-    if (CurTok == tok_number) {
-      if (NumVal &lt; 1 || NumVal &gt; 100)
-        return ErrorP("Invalid precedecnce: must be 1..100");
-      BinaryPrecedence = (unsigned)NumVal;
-      getNextToken();
-    }
-    break;
-  }
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector&lt;std::string&gt; ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  // Verify right number of names for operator.
-  if (Kind &amp;&amp; ArgNames.size() != Kind)
-    return ErrorP("Invalid number of operands for operator");
-  
-  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Code Generation
-//===----------------------------------------------------------------------===//
-
-static Module *TheModule;
-static IRBuilder&lt;&gt; Builder(getGlobalContext());
-static std::map&lt;std::string, Value*&gt; NamedValues;
-static FunctionPassManager *TheFPM;
-
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
-}
-
-Value *UnaryExprAST::Codegen() {
-  Value *OperandV = Operand-&gt;Codegen();
-  if (OperandV == 0) return 0;
-  
-  Function *F = TheModule-&gt;getFunction(std::string("unary")+Opcode);
-  if (F == 0)
-    return ErrorV("Unknown unary operator");
-  
-  return Builder.CreateCall(F, OperandV, "unop");
-}
-
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS-&gt;Codegen();
-  Value *R = RHS-&gt;Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '&lt;':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: break;
-  }
-  
-  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
-  // a call to it.
-  Function *F = TheModule-&gt;getFunction(std::string("binary")+Op);
-  assert(F &amp;&amp; "binary operator not found!");
-  
-  Value *Ops[2] = { L, R };
-  return Builder.CreateCall(F, Ops, "binop");
-}
-
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule-&gt;getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF-&gt;arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector&lt;Value*&gt; ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]-&gt;Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-
-Value *IfExprAST::Codegen() {
-  Value *CondV = Cond-&gt;Codegen();
-  if (CondV == 0) return 0;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  CondV = Builder.CreateFCmpONE(CondV, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                "ifcond");
-  
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-  
-  // Create blocks for the then and else cases.  Insert the 'then' block at the
-  // end of the function.
-  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
-  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
-  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
-  
-  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
-  
-  // Emit then value.
-  Builder.SetInsertPoint(ThenBB);
-  
-  Value *ThenV = Then-&gt;Codegen();
-  if (ThenV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
-  ThenBB = Builder.GetInsertBlock();
-  
-  // Emit else block.
-  TheFunction-&gt;getBasicBlockList().push_back(ElseBB);
-  Builder.SetInsertPoint(ElseBB);
-  
-  Value *ElseV = Else-&gt;Codegen();
-  if (ElseV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
-  ElseBB = Builder.GetInsertBlock();
-  
-  // Emit merge block.
-  TheFunction-&gt;getBasicBlockList().push_back(MergeBB);
-  Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
-                                  "iftmp");
-  
-  PN-&gt;addIncoming(ThenV, ThenBB);
-  PN-&gt;addIncoming(ElseV, ElseBB);
-  return PN;
-}
-
-Value *ForExprAST::Codegen() {
-  // Output this as:
-  //   ...
-  //   start = startexpr
-  //   goto loop
-  // loop: 
-  //   variable = phi [start, loopheader], [nextvariable, loopend]
-  //   ...
-  //   bodyexpr
-  //   ...
-  // loopend:
-  //   step = stepexpr
-  //   nextvariable = variable + step
-  //   endcond = endexpr
-  //   br endcond, loop, endloop
-  // outloop:
-  
-  // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start-&gt;Codegen();
-  if (StartVal == 0) return 0;
-  
-  // Make the new basic block for the loop header, inserting after current
-  // block.
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
-  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
-  
-  // Insert an explicit fall through from the current block to the LoopBB.
-  Builder.CreateBr(LoopBB);
-
-  // Start insertion in LoopBB.
-  Builder.SetInsertPoint(LoopBB);
-  
-  // Start the PHI node with an entry for Start.
-  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
-  Variable-&gt;addIncoming(StartVal, PreheaderBB);
-  
-  // Within the loop, the variable is defined equal to the PHI node.  If it
-  // shadows an existing variable, we have to restore it, so save it now.
-  Value *OldVal = NamedValues[VarName];
-  NamedValues[VarName] = Variable;
-  
-  // Emit the body of the loop.  This, like any other expr, can change the
-  // current BB.  Note that we ignore the value computed by the body, but don't
-  // allow an error.
-  if (Body-&gt;Codegen() == 0)
-    return 0;
-  
-  // Emit the step value.
-  Value *StepVal;
-  if (Step) {
-    StepVal = Step-&gt;Codegen();
-    if (StepVal == 0) return 0;
-  } else {
-    // If not specified, use 1.0.
-    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
-  }
-  
-  Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
-
-  // Compute the end condition.
-  Value *EndCond = End-&gt;Codegen();
-  if (EndCond == 0) return EndCond;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  EndCond = Builder.CreateFCmpONE(EndCond, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                  "loopcond");
-  
-  // Create the "after loop" block and insert it.
-  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
-  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
-  
-  // Insert the conditional branch into the end of LoopEndBB.
-  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
-  
-  // Any new code will be inserted in AfterBB.
-  Builder.SetInsertPoint(AfterBB);
-  
-  // Add a new entry to the PHI node for the backedge.
-  Variable-&gt;addIncoming(NextVar, LoopEndBB);
-  
-  // Restore the unshadowed variable.
-  if (OldVal)
-    NamedValues[VarName] = OldVal;
-  else
-    NamedValues.erase(VarName);
-
-  
-  // for expr always returns 0.0.
-  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
-}
-
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector&lt;Type*&gt; Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-  
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-  
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F-&gt;getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F-&gt;eraseFromParent();
-    F = TheModule-&gt;getFunction(Name);
-    
-    // If F already has a body, reject this.
-    if (!F-&gt;empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F-&gt;arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-  
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI-&gt;setName(Args[Idx]);
-    
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
-  
-  return F;
-}
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto-&gt;Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  // If this is an operator, install it.
-  if (Proto-&gt;isBinaryOp())
-    BinopPrecedence[Proto-&gt;getOperatorName()] = Proto-&gt;getBinaryPrecedence();
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body-&gt;Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    // Optimize the function.
-    TheFPM-&gt;run(*TheFunction);
-    
-    return TheFunction;
-  }
-  
-  // Error reading body, remove function.
-  TheFunction-&gt;eraseFromParent();
-
-  if (Proto-&gt;isBinaryOp())
-    BinopPrecedence.erase(Proto-&gt;getOperatorName());
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing and JIT Driver
-//===----------------------------------------------------------------------===//
-
-static ExecutionEngine *TheExecutionEngine;
-
-static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      fprintf(stderr, "Read function definition:");
-      LF-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P-&gt;Codegen()) {
-      fprintf(stderr, "Read extern: ");
-      F-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      // JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine-&gt;getPointerToFunction(LF);
-      
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
-      fprintf(stderr, "Evaluated to %f\n", FP());
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready&gt; ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// "Library" functions that can be "extern'd" from user code.
-//===----------------------------------------------------------------------===//
-
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-/// printd - printf that takes a double prints it as "%f\n", returning 0.
-extern "C" 
-double printd(double X) {
-  printf("%f\n", X);
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  InitializeNativeTarget();
-  LLVMContext &amp;Context = getGlobalContext();
-
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['&lt;'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready&gt; ");
-  getNextToken();
-
-  // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit", Context);
-
-  // Create the JIT.  This takes ownership of the module.
-  std::string ErrStr;
-  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&amp;ErrStr).create();
-  if (!TheExecutionEngine) {
-    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
-    exit(1);
-  }
-
-  FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  OurFPM.add(new DataLayout(*TheExecutionEngine-&gt;getDataLayout()));
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &amp;OurFPM;
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  TheFPM = 0;
-
-  // Print out all of the generated code.
-  TheModule-&gt;dump();
-
-  return 0;
-}
-</pre>
-</div>
-
-<a href="LangImpl7.html">Next: Extending the language: mutable variables / SSA construction</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/LangImpl6.rst b/docs/tutorial/LangImpl6.rst
new file mode 100644
index 0000000000..30f4e90d03
--- /dev/null
+++ b/docs/tutorial/LangImpl6.rst
@@ -0,0 +1,1728 @@
+============================================================
+Kaleidoscope: Extending the Language: User-defined Operators
+============================================================
+
+.. contents::
+   :local:
+
+Written by `Chris Lattner <mailto:sabre@nondot.org>`_
+
+Chapter 6 Introduction
+======================
+
+Welcome to Chapter 6 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. At this point in our tutorial, we now
+have a fully functional language that is fairly minimal, but also
+useful. There is still one big problem with it, however. Our language
+doesn't have many useful operators (like division, logical negation, or
+even any comparisons besides less-than).
+
+This chapter of the tutorial takes a wild digression into adding
+user-defined operators to the simple and beautiful Kaleidoscope
+language. This digression now gives us a simple and ugly language in
+some ways, but also a powerful one at the same time. One of the great
+things about creating your own language is that you get to decide what
+is good or bad. In this tutorial we'll assume that it is okay to use
+this as a way to show some interesting parsing techniques.
+
+At the end of this tutorial, we'll run through an example Kaleidoscope
+application that `renders the Mandelbrot set <#example>`_. This gives an
+example of what you can build with Kaleidoscope and its feature set.
+
+User-defined Operators: the Idea
+================================
+
+The "operator overloading" that we will add to Kaleidoscope is more
+general than languages like C++. In C++, you are only allowed to
+redefine existing operators: you can't programatically change the
+grammar, introduce new operators, change precedence levels, etc. In this
+chapter, we will add this capability to Kaleidoscope, which will let the
+user round out the set of operators that are supported.
+
+The point of going into user-defined operators in a tutorial like this
+is to show the power and flexibility of using a hand-written parser.
+Thus far, the parser we have been implementing uses recursive descent
+for most parts of the grammar and operator precedence parsing for the
+expressions. See `Chapter 2 <LangImpl2.html>`_ for details. Without
+using operator precedence parsing, it would be very difficult to allow
+the programmer to introduce new operators into the grammar: the grammar
+is dynamically extensible as the JIT runs.
+
+The two specific features we'll add are programmable unary operators
+(right now, Kaleidoscope has no unary operators at all) as well as
+binary operators. An example of this is:
+
+::
+
+    # Logical unary not.
+    def unary!(v)
+      if v then
+        0
+      else
+        1;
+
+    # Define > with the same precedence as <.
+    def binary> 10 (LHS RHS)
+      RHS < LHS;
+
+    # Binary "logical or", (note that it does not "short circuit")
+    def binary| 5 (LHS RHS)
+      if LHS then
+        1
+      else if RHS then
+        1
+      else
+        0;
+
+    # Define = with slightly lower precedence than relationals.
+    def binary= 9 (LHS RHS)
+      !(LHS < RHS | LHS > RHS);
+
+Many languages aspire to being able to implement their standard runtime
+library in the language itself. In Kaleidoscope, we can implement
+significant parts of the language in the library!
+
+We will break down implementation of these features into two parts:
+implementing support for user-defined binary operators and adding unary
+operators.
+
+User-defined Binary Operators
+=============================
+
+Adding support for user-defined binary operators is pretty simple with
+our current framework. We'll first add support for the unary/binary
+keywords:
+
+.. code-block:: c++
+
+    enum Token {
+      ...
+      // operators
+      tok_binary = -11, tok_unary = -12
+    };
+    ...
+    static int gettok() {
+    ...
+        if (IdentifierStr == "for") return tok_for;
+        if (IdentifierStr == "in") return tok_in;
+        if (IdentifierStr == "binary") return tok_binary;
+        if (IdentifierStr == "unary") return tok_unary;
+        return tok_identifier;
+
+This just adds lexer support for the unary and binary keywords, like we
+did in `previous chapters <LangImpl5.html#iflexer>`_. One nice thing
+about our current AST, is that we represent binary operators with full
+generalisation by using their ASCII code as the opcode. For our extended
+operators, we'll use this same representation, so we don't need any new
+AST or parser support.
+
+On the other hand, we have to be able to represent the definitions of
+these new operators, in the "def binary\| 5" part of the function
+definition. In our grammar so far, the "name" for the function
+definition is parsed as the "prototype" production and into the
+``PrototypeAST`` AST node. To represent our new user-defined operators
+as prototypes, we have to extend the ``PrototypeAST`` AST node like
+this:
+
+.. code-block:: c++
+
+    /// PrototypeAST - This class represents the "prototype" for a function,
+    /// which captures its argument names as well as if it is an operator.
+    class PrototypeAST {
+      std::string Name;
+      std::vector<std::string> Args;
+      bool isOperator;
+      unsigned Precedence;  // Precedence if a binary op.
+    public:
+      PrototypeAST(const std::string &name, const std::vector<std::string> &args,
+                   bool isoperator = false, unsigned prec = 0)
+      : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
+
+      bool isUnaryOp() const { return isOperator && Args.size() == 1; }
+      bool isBinaryOp() const { return isOperator && Args.size() == 2; }
+
+      char getOperatorName() const {
+        assert(isUnaryOp() || isBinaryOp());
+        return Name[Name.size()-1];
+      }
+
+      unsigned getBinaryPrecedence() const { return Precedence; }
+
+      Function *Codegen();
+    };
+
+Basically, in addition to knowing a name for the prototype, we now keep
+track of whether it was an operator, and if it was, what precedence
+level the operator is at. The precedence is only used for binary
+operators (as you'll see below, it just doesn't apply for unary
+operators). Now that we have a way to represent the prototype for a
+user-defined operator, we need to parse it:
+
+.. code-block:: c++
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    ///   ::= binary LETTER number? (id, id)
+    static PrototypeAST *ParsePrototype() {
+      std::string FnName;
+
+      unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
+      unsigned BinaryPrecedence = 30;
+
+      switch (CurTok) {
+      default:
+        return ErrorP("Expected function name in prototype");
+      case tok_identifier:
+        FnName = IdentifierStr;
+        Kind = 0;
+        getNextToken();
+        break;
+      case tok_binary:
+        getNextToken();
+        if (!isascii(CurTok))
+          return ErrorP("Expected binary operator");
+        FnName = "binary";
+        FnName += (char)CurTok;
+        Kind = 2;
+        getNextToken();
+
+        // Read the precedence if present.
+        if (CurTok == tok_number) {
+          if (NumVal < 1 || NumVal > 100)
+            return ErrorP("Invalid precedecnce: must be 1..100");
+          BinaryPrecedence = (unsigned)NumVal;
+          getNextToken();
+        }
+        break;
+      }
+
+      if (CurTok != '(')
+        return ErrorP("Expected '(' in prototype");
+
+      std::vector<std::string> ArgNames;
+      while (getNextToken() == tok_identifier)
+        ArgNames.push_back(IdentifierStr);
+      if (CurTok != ')')
+        return ErrorP("Expected ')' in prototype");
+
+      // success.
+      getNextToken();  // eat ')'.
+
+      // Verify right number of names for operator.
+      if (Kind && ArgNames.size() != Kind)
+        return ErrorP("Invalid number of operands for operator");
+
+      return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
+    }
+
+This is all fairly straightforward parsing code, and we have already
+seen a lot of similar code in the past. One interesting part about the
+code above is the couple lines that set up ``FnName`` for binary
+operators. This builds names like "binary@" for a newly defined "@"
+operator. This then takes advantage of the fact that symbol names in the
+LLVM symbol table are allowed to have any character in them, including
+embedded nul characters.
+
+The next interesting thing to add, is codegen support for these binary
+operators. Given our current structure, this is a simple addition of a
+default case for our existing binary operator node:
+
+.. code-block:: c++
+
+    Value *BinaryExprAST::Codegen() {
+      Value *L = LHS->Codegen();
+      Value *R = RHS->Codegen();
+      if (L == 0 || R == 0) return 0;
+
+      switch (Op) {
+      case '+': return Builder.CreateFAdd(L, R, "addtmp");
+      case '-': return Builder.CreateFSub(L, R, "subtmp");
+      case '*': return Builder.CreateFMul(L, R, "multmp");
+      case '<':
+        L = Builder.CreateFCmpULT(L, R, "cmptmp");
+        // Convert bool 0/1 to double 0.0 or 1.0
+        return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                    "booltmp");
+      default: break;
+      }
+
+      // If it wasn't a builtin binary operator, it must be a user defined one. Emit
+      // a call to it.
+      Function *F = TheModule->getFunction(std::string("binary")+Op);
+      assert(F && "binary operator not found!");
+
+      Value *Ops[2] = { L, R };
+      return Builder.CreateCall(F, Ops, "binop");
+    }
+
+As you can see above, the new code is actually really simple. It just
+does a lookup for the appropriate operator in the symbol table and
+generates a function call to it. Since user-defined operators are just
+built as normal functions (because the "prototype" boils down to a
+function with the right name) everything falls into place.
+
+The final piece of code we are missing, is a bit of top-level magic:
+
+.. code-block:: c++
+
+    Function *FunctionAST::Codegen() {
+      NamedValues.clear();
+
+      Function *TheFunction = Proto->Codegen();
+      if (TheFunction == 0)
+        return 0;
+
+      // If this is an operator, install it.
+      if (Proto->isBinaryOp())
+        BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
+
+      // Create a new basic block to start insertion into.
+      BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+      Builder.SetInsertPoint(BB);
+
+      if (Value *RetVal = Body->Codegen()) {
+        ...
+
+Basically, before codegening a function, if it is a user-defined
+operator, we register it in the precedence table. This allows the binary
+operator parsing logic we already have in place to handle it. Since we
+are working on a fully-general operator precedence parser, this is all
+we need to do to "extend the grammar".
+
+Now we have useful user-defined binary operators. This builds a lot on
+the previous framework we built for other operators. Adding unary
+operators is a bit more challenging, because we don't have any framework
+for it yet - lets see what it takes.
+
+User-defined Unary Operators
+============================
+
+Since we don't currently support unary operators in the Kaleidoscope
+language, we'll need to add everything to support them. Above, we added
+simple support for the 'unary' keyword to the lexer. In addition to
+that, we need an AST node:
+
+.. code-block:: c++
+
+    /// UnaryExprAST - Expression class for a unary operator.
+    class UnaryExprAST : public ExprAST {
+      char Opcode;
+      ExprAST *Operand;
+    public:
+      UnaryExprAST(char opcode, ExprAST *operand)
+        : Opcode(opcode), Operand(operand) {}
+      virtual Value *Codegen();
+    };
+
+This AST node is very simple and obvious by now. It directly mirrors the
+binary operator AST node, except that it only has one child. With this,
+we need to add the parsing logic. Parsing a unary operator is pretty
+simple: we'll add a new function to do it:
+
+.. code-block:: c++
+
+    /// unary
+    ///   ::= primary
+    ///   ::= '!' unary
+    static ExprAST *ParseUnary() {
+      // If the current token is not an operator, it must be a primary expr.
+      if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+        return ParsePrimary();
+
+      // If this is a unary operator, read it.
+      int Opc = CurTok;
+      getNextToken();
+      if (ExprAST *Operand = ParseUnary())
+        return new UnaryExprAST(Opc, Operand);
+      return 0;
+    }
+
+The grammar we add is pretty straightforward here. If we see a unary
+operator when parsing a primary operator, we eat the operator as a
+prefix and parse the remaining piece as another unary operator. This
+allows us to handle multiple unary operators (e.g. "!!x"). Note that
+unary operators can't have ambiguous parses like binary operators can,
+so there is no need for precedence information.
+
+The problem with this function, is that we need to call ParseUnary from
+somewhere. To do this, we change previous callers of ParsePrimary to
+call ParseUnary instead:
+
+.. code-block:: c++
+
+    /// binoprhs
+    ///   ::= ('+' unary)*
+    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+      ...
+        // Parse the unary expression after the binary operator.
+        ExprAST *RHS = ParseUnary();
+        if (!RHS) return 0;
+      ...
+    }
+    /// expression
+    ///   ::= unary binoprhs
+    ///
+    static ExprAST *ParseExpression() {
+      ExprAST *LHS = ParseUnary();
+      if (!LHS) return 0;
+
+      return ParseBinOpRHS(0, LHS);
+    }
+
+With these two simple changes, we are now able to parse unary operators
+and build the AST for them. Next up, we need to add parser support for
+prototypes, to parse the unary operator prototype. We extend the binary
+operator code above with:
+
+.. code-block:: c++
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    ///   ::= binary LETTER number? (id, id)
+    ///   ::= unary LETTER (id)
+    static PrototypeAST *ParsePrototype() {
+      std::string FnName;
+
+      unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
+      unsigned BinaryPrecedence = 30;
+
+      switch (CurTok) {
+      default:
+        return ErrorP("Expected function name in prototype");
+      case tok_identifier:
+        FnName = IdentifierStr;
+        Kind = 0;
+        getNextToken();
+        break;
+      case tok_unary:
+        getNextToken();
+        if (!isascii(CurTok))
+          return ErrorP("Expected unary operator");
+        FnName = "unary";
+        FnName += (char)CurTok;
+        Kind = 1;
+        getNextToken();
+        break;
+      case tok_binary:
+        ...
+
+As with binary operators, we name unary operators with a name that
+includes the operator character. This assists us at code generation
+time. Speaking of, the final piece we need to add is codegen support for
+unary operators. It looks like this:
+
+.. code-block:: c++
+
+    Value *UnaryExprAST::Codegen() {
+      Value *OperandV = Operand->Codegen();
+      if (OperandV == 0) return 0;
+
+      Function *F = TheModule->getFunction(std::string("unary")+Opcode);
+      if (F == 0)
+        return ErrorV("Unknown unary operator");
+
+      return Builder.CreateCall(F, OperandV, "unop");
+    }
+
+This code is similar to, but simpler than, the code for binary
+operators. It is simpler primarily because it doesn't need to handle any
+predefined operators.
+
+Kicking the Tires
+=================
+
+It is somewhat hard to believe, but with a few simple extensions we've
+covered in the last chapters, we have grown a real-ish language. With
+this, we can do a lot of interesting things, including I/O, math, and a
+bunch of other things. For example, we can now add a nice sequencing
+operator (printd is defined to print out the specified value and a
+newline):
+
+::
+
+    ready> extern printd(x);
+    Read extern:
+    declare double @printd(double)
+
+    ready> def binary : 1 (x y) 0;  # Low-precedence operator that ignores operands.
+    ..
+    ready> printd(123) : printd(456) : printd(789);
+    123.000000
+    456.000000
+    789.000000
+    Evaluated to 0.000000
+
+We can also define a bunch of other "primitive" operations, such as:
+
+::
+
+    # Logical unary not.
+    def unary!(v)
+      if v then
+        0
+      else
+        1;
+
+    # Unary negate.
+    def unary-(v)
+      0-v;
+
+    # Define > with the same precedence as <.
+    def binary> 10 (LHS RHS)
+      RHS < LHS;
+
+    # Binary logical or, which does not short circuit.
+    def binary| 5 (LHS RHS)
+      if LHS then
+        1
+      else if RHS then
+        1
+      else
+        0;
+
+    # Binary logical and, which does not short circuit.
+    def binary& 6 (LHS RHS)
+      if !LHS then
+        0
+      else
+        !!RHS;
+
+    # Define = with slightly lower precedence than relationals.
+    def binary = 9 (LHS RHS)
+      !(LHS < RHS | LHS > RHS);
+
+    # Define ':' for sequencing: as a low-precedence operator that ignores operands
+    # and just returns the RHS.
+    def binary : 1 (x y) y;
+
+Given the previous if/then/else support, we can also define interesting
+functions for I/O. For example, the following prints out a character
+whose "density" reflects the value passed in: the lower the value, the
+denser the character:
+
+::
+
+    ready>
+
+    extern putchard(char)
+    def printdensity(d)
+      if d > 8 then
+        putchard(32)  # ' '
+      else if d > 4 then
+        putchard(46)  # '.'
+      else if d > 2 then
+        putchard(43)  # '+'
+      else
+        putchard(42); # '*'
+    ...
+    ready> printdensity(1): printdensity(2): printdensity(3):
+           printdensity(4): printdensity(5): printdensity(9):
+           putchard(10);
+    **++.
+    Evaluated to 0.000000
+
+Based on these simple primitive operations, we can start to define more
+interesting things. For example, here's a little function that solves
+for the number of iterations it takes a function in the complex plane to
+converge:
+
+::
+
+    # Determine whether the specific location diverges.
+    # Solve for z = z^2 + c in the complex plane.
+    def mandleconverger(real imag iters creal cimag)
+      if iters > 255 | (real*real + imag*imag > 4) then
+        iters
+      else
+        mandleconverger(real*real - imag*imag + creal,
+                        2*real*imag + cimag,
+                        iters+1, creal, cimag);
+
+    # Return the number of iterations required for the iteration to escape
+    def mandleconverge(real imag)
+      mandleconverger(real, imag, 0, real, imag);
+
+This "``z = z2 + c``" function is a beautiful little creature that is
+the basis for computation of the `Mandelbrot
+Set <http://en.wikipedia.org/wiki/Mandelbrot_set>`_. Our
+``mandelconverge`` function returns the number of iterations that it
+takes for a complex orbit to escape, saturating to 255. This is not a
+very useful function by itself, but if you plot its value over a
+two-dimensional plane, you can see the Mandelbrot set. Given that we are
+limited to using putchard here, our amazing graphical output is limited,
+but we can whip together something using the density plotter above:
+
+::
+
+    # Compute and plot the mandlebrot set with the specified 2 dimensional range
+    # info.
+    def mandelhelp(xmin xmax xstep   ymin ymax ystep)
+      for y = ymin, y < ymax, ystep in (
+        (for x = xmin, x < xmax, xstep in
+           printdensity(mandleconverge(x,y)))
+        : putchard(10)
+      )
+
+    # mandel - This is a convenient helper function for plotting the mandelbrot set
+    # from the specified position with the specified Magnification.
+    def mandel(realstart imagstart realmag imagmag)
+      mandelhelp(realstart, realstart+realmag*78, realmag,
+                 imagstart, imagstart+imagmag*40, imagmag);
+
+Given this, we can try plotting out the mandlebrot set! Lets try it out:
+
+::
+
+    ready> mandel(-2.3, -1.3, 0.05, 0.07);
+    *******************************+++++++++++*************************************
+    *************************+++++++++++++++++++++++*******************************
+    **********************+++++++++++++++++++++++++++++****************************
+    *******************+++++++++++++++++++++.. ...++++++++*************************
+    *****************++++++++++++++++++++++.... ...+++++++++***********************
+    ***************+++++++++++++++++++++++.....   ...+++++++++*********************
+    **************+++++++++++++++++++++++....     ....+++++++++********************
+    *************++++++++++++++++++++++......      .....++++++++*******************
+    ************+++++++++++++++++++++.......       .......+++++++******************
+    ***********+++++++++++++++++++....                ... .+++++++*****************
+    **********+++++++++++++++++.......                     .+++++++****************
+    *********++++++++++++++...........                    ...+++++++***************
+    ********++++++++++++............                      ...++++++++**************
+    ********++++++++++... ..........                        .++++++++**************
+    *******+++++++++.....                                   .+++++++++*************
+    *******++++++++......                                  ..+++++++++*************
+    *******++++++.......                                   ..+++++++++*************
+    *******+++++......                                     ..+++++++++*************
+    *******.... ....                                      ...+++++++++*************
+    *******.... .                                         ...+++++++++*************
+    *******+++++......                                    ...+++++++++*************
+    *******++++++.......                                   ..+++++++++*************
+    *******++++++++......                                   .+++++++++*************
+    *******+++++++++.....                                  ..+++++++++*************
+    ********++++++++++... ..........                        .++++++++**************
+    ********++++++++++++............                      ...++++++++**************
+    *********++++++++++++++..........                     ...+++++++***************
+    **********++++++++++++++++........                     .+++++++****************
+    **********++++++++++++++++++++....                ... ..+++++++****************
+    ***********++++++++++++++++++++++.......       .......++++++++*****************
+    ************+++++++++++++++++++++++......      ......++++++++******************
+    **************+++++++++++++++++++++++....      ....++++++++********************
+    ***************+++++++++++++++++++++++.....   ...+++++++++*********************
+    *****************++++++++++++++++++++++....  ...++++++++***********************
+    *******************+++++++++++++++++++++......++++++++*************************
+    *********************++++++++++++++++++++++.++++++++***************************
+    *************************+++++++++++++++++++++++*******************************
+    ******************************+++++++++++++************************************
+    *******************************************************************************
+    *******************************************************************************
+    *******************************************************************************
+    Evaluated to 0.000000
+    ready> mandel(-2, -1, 0.02, 0.04);
+    **************************+++++++++++++++++++++++++++++++++++++++++++++++++++++
+    ***********************++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+    *********************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.
+    *******************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++...
+    *****************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.....
+    ***************++++++++++++++++++++++++++++++++++++++++++++++++++++++++........
+    **************++++++++++++++++++++++++++++++++++++++++++++++++++++++...........
+    ************+++++++++++++++++++++++++++++++++++++++++++++++++++++..............
+    ***********++++++++++++++++++++++++++++++++++++++++++++++++++........        .
+    **********++++++++++++++++++++++++++++++++++++++++++++++.............
+    ********+++++++++++++++++++++++++++++++++++++++++++..................
+    *******+++++++++++++++++++++++++++++++++++++++.......................
+    ******+++++++++++++++++++++++++++++++++++...........................
+    *****++++++++++++++++++++++++++++++++............................
+    *****++++++++++++++++++++++++++++...............................
+    ****++++++++++++++++++++++++++......   .........................
+    ***++++++++++++++++++++++++.........     ......    ...........
+    ***++++++++++++++++++++++............
+    **+++++++++++++++++++++..............
+    **+++++++++++++++++++................
+    *++++++++++++++++++.................
+    *++++++++++++++++............ ...
+    *++++++++++++++..............
+    *+++....++++................
+    *..........  ...........
+    *
+    *..........  ...........
+    *+++....++++................
+    *++++++++++++++..............
+    *++++++++++++++++............ ...
+    *++++++++++++++++++.................
+    **+++++++++++++++++++................
+    **+++++++++++++++++++++..............
+    ***++++++++++++++++++++++............
+    ***++++++++++++++++++++++++.........     ......    ...........
+    ****++++++++++++++++++++++++++......   .........................
+    *****++++++++++++++++++++++++++++...............................
+    *****++++++++++++++++++++++++++++++++............................
+    ******+++++++++++++++++++++++++++++++++++...........................
+    *******+++++++++++++++++++++++++++++++++++++++.......................
+    ********+++++++++++++++++++++++++++++++++++++++++++..................
+    Evaluated to 0.000000
+    ready> mandel(-0.9, -1.4, 0.02, 0.03);
+    *******************************************************************************
+    *******************************************************************************
+    *******************************************************************************
+    **********+++++++++++++++++++++************************************************
+    *+++++++++++++++++++++++++++++++++++++++***************************************
+    +++++++++++++++++++++++++++++++++++++++++++++**********************************
+    ++++++++++++++++++++++++++++++++++++++++++++++++++*****************************
+    ++++++++++++++++++++++++++++++++++++++++++++++++++++++*************************
+    +++++++++++++++++++++++++++++++++++++++++++++++++++++++++**********************
+    +++++++++++++++++++++++++++++++++.........++++++++++++++++++*******************
+    +++++++++++++++++++++++++++++++....   ......+++++++++++++++++++****************
+    +++++++++++++++++++++++++++++.......  ........+++++++++++++++++++**************
+    ++++++++++++++++++++++++++++........   ........++++++++++++++++++++************
+    +++++++++++++++++++++++++++.........     ..  ...+++++++++++++++++++++**********
+    ++++++++++++++++++++++++++...........        ....++++++++++++++++++++++********
+    ++++++++++++++++++++++++.............       .......++++++++++++++++++++++******
+    +++++++++++++++++++++++.............        ........+++++++++++++++++++++++****
+    ++++++++++++++++++++++...........           ..........++++++++++++++++++++++***
+    ++++++++++++++++++++...........                .........++++++++++++++++++++++*
+    ++++++++++++++++++............                  ...........++++++++++++++++++++
+    ++++++++++++++++...............                 .............++++++++++++++++++
+    ++++++++++++++.................                 ...............++++++++++++++++
+    ++++++++++++..................                  .................++++++++++++++
+    +++++++++..................                      .................+++++++++++++
+    ++++++........        .                               .........  ..++++++++++++
+    ++............                                         ......    ....++++++++++
+    ..............                                                    ...++++++++++
+    ..............                                                    ....+++++++++
+    ..............                                                    .....++++++++
+    .............                                                    ......++++++++
+    ...........                                                     .......++++++++
+    .........                                                       ........+++++++
+    .........                                                       ........+++++++
+    .........                                                           ....+++++++
+    ........                                                             ...+++++++
+    .......                                                              ...+++++++
+                                                                        ....+++++++
+                                                                       .....+++++++
+                                                                        ....+++++++
+                                                                        ....+++++++
+                                                                        ....+++++++
+    Evaluated to 0.000000
+    ready> ^D
+
+At this point, you may be starting to realize that Kaleidoscope is a
+real and powerful language. It may not be self-similar :), but it can be
+used to plot things that are!
+
+With this, we conclude the "adding user-defined operators" chapter of
+the tutorial. We have successfully augmented our language, adding the
+ability to extend the language in the library, and we have shown how
+this can be used to build a simple but interesting end-user application
+in Kaleidoscope. At this point, Kaleidoscope can build a variety of
+applications that are functional and can call functions with
+side-effects, but it can't actually define and mutate a variable itself.
+
+Strikingly, variable mutation is an important feature of some languages,
+and it is not at all obvious how to `add support for mutable
+variables <LangImpl7.html>`_ without having to add an "SSA construction"
+phase to your front-end. In the next chapter, we will describe how you
+can add variable mutation without building SSA in your front-end.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+the if/then/else and for expressions.. To build this example, use:
+
+.. code-block:: bash
+
+    # Compile
+    clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
+    # Run
+    ./toy
+
+On some platforms, you will need to specify -rdynamic or
+-Wl,--export-dynamic when linking. This ensures that symbols defined in
+the main executable are exported to the dynamic linker and so are
+available for symbol resolution at run time. This is not needed if you
+compile your support code into a shared library, although doing that
+will cause problems on Windows.
+
+Here is the code:
+
+.. code-block:: c++
+
+    #include "llvm/DerivedTypes.h"
+    #include "llvm/ExecutionEngine/ExecutionEngine.h"
+    #include "llvm/ExecutionEngine/JIT.h"
+    #include "llvm/IRBuilder.h"
+    #include "llvm/LLVMContext.h"
+    #include "llvm/Module.h"
+    #include "llvm/PassManager.h"
+    #include "llvm/Analysis/Verifier.h"
+    #include "llvm/Analysis/Passes.h"
+    #include "llvm/DataLayout.h"
+    #include "llvm/Transforms/Scalar.h"
+    #include "llvm/Support/TargetSelect.h"
+    #include <cstdio>
+    #include <string>
+    #include <map>
+    #include <vector>
+    using namespace llvm;
+
+    //===----------------------------------------------------------------------===//
+    // Lexer
+    //===----------------------------------------------------------------------===//
+
+    // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+    // of these for known things.
+    enum Token {
+      tok_eof = -1,
+
+      // commands
+      tok_def = -2, tok_extern = -3,
+
+      // primary
+      tok_identifier = -4, tok_number = -5,
+
+      // control
+      tok_if = -6, tok_then = -7, tok_else = -8,
+      tok_for = -9, tok_in = -10,
+
+      // operators
+      tok_binary = -11, tok_unary = -12
+    };
+
+    static std::string IdentifierStr;  // Filled in if tok_identifier
+    static double NumVal;              // Filled in if tok_number
+
+    /// gettok - Return the next token from standard input.
+    static int gettok() {
+      static int LastChar = ' ';
+
+      // Skip any whitespace.
+      while (isspace(LastChar))
+        LastChar = getchar();
+
+      if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+        IdentifierStr = LastChar;
+        while (isalnum((LastChar = getchar())))
+          IdentifierStr += LastChar;
+
+        if (IdentifierStr == "def") return tok_def;
+        if (IdentifierStr == "extern") return tok_extern;
+        if (IdentifierStr == "if") return tok_if;
+        if (IdentifierStr == "then") return tok_then;
+        if (IdentifierStr == "else") return tok_else;
+        if (IdentifierStr == "for") return tok_for;
+        if (IdentifierStr == "in") return tok_in;
+        if (IdentifierStr == "binary") return tok_binary;
+        if (IdentifierStr == "unary") return tok_unary;
+        return tok_identifier;
+      }
+
+      if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+        std::string NumStr;
+        do {
+          NumStr += LastChar;
+          LastChar = getchar();
+        } while (isdigit(LastChar) || LastChar == '.');
+
+        NumVal = strtod(NumStr.c_str(), 0);
+        return tok_number;
+      }
+
+      if (LastChar == '#') {
+        // Comment until end of line.
+        do LastChar = getchar();
+        while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+        if (LastChar != EOF)
+          return gettok();
+      }
+
+      // Check for end of file.  Don't eat the EOF.
+      if (LastChar == EOF)
+        return tok_eof;
+
+      // Otherwise, just return the character as its ascii value.
+      int ThisChar = LastChar;
+      LastChar = getchar();
+      return ThisChar;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Abstract Syntax Tree (aka Parse Tree)
+    //===----------------------------------------------------------------------===//
+
+    /// ExprAST - Base class for all expression nodes.
+    class ExprAST {
+    public:
+      virtual ~ExprAST() {}
+      virtual Value *Codegen() = 0;
+    };
+
+    /// NumberExprAST - Expression class for numeric literals like "1.0".
+    class NumberExprAST : public ExprAST {
+      double Val;
+    public:
+      NumberExprAST(double val) : Val(val) {}
+      virtual Value *Codegen();
+    };
+
+    /// VariableExprAST - Expression class for referencing a variable, like "a".
+    class VariableExprAST : public ExprAST {
+      std::string Name;
+    public:
+      VariableExprAST(const std::string &name) : Name(name) {}
+      virtual Value *Codegen();
+    };
+
+    /// UnaryExprAST - Expression class for a unary operator.
+    class UnaryExprAST : public ExprAST {
+      char Opcode;
+      ExprAST *Operand;
+    public:
+      UnaryExprAST(char opcode, ExprAST *operand)
+        : Opcode(opcode), Operand(operand) {}
+      virtual Value *Codegen();
+    };
+
+    /// BinaryExprAST - Expression class for a binary operator.
+    class BinaryExprAST : public ExprAST {
+      char Op;
+      ExprAST *LHS, *RHS;
+    public:
+      BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
+        : Op(op), LHS(lhs), RHS(rhs) {}
+      virtual Value *Codegen();
+    };
+
+    /// CallExprAST - Expression class for function calls.
+    class CallExprAST : public ExprAST {
+      std::string Callee;
+      std::vector<ExprAST*> Args;
+    public:
+      CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+        : Callee(callee), Args(args) {}
+      virtual Value *Codegen();
+    };
+
+    /// IfExprAST - Expression class for if/then/else.
+    class IfExprAST : public ExprAST {
+      ExprAST *Cond, *Then, *Else;
+    public:
+      IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+      : Cond(cond), Then(then), Else(_else) {}
+      virtual Value *Codegen();
+    };
+
+    /// ForExprAST - Expression class for for/in.
+    class ForExprAST : public ExprAST {
+      std::string VarName;
+      ExprAST *Start, *End, *Step, *Body;
+    public:
+      ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
+                 ExprAST *step, ExprAST *body)
+        : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+      virtual Value *Codegen();
+    };
+
+    /// PrototypeAST - This class represents the "prototype" for a function,
+    /// which captures its name, and its argument names (thus implicitly the number
+    /// of arguments the function takes), as well as if it is an operator.
+    class PrototypeAST {
+      std::string Name;
+      std::vector<std::string> Args;
+      bool isOperator;
+      unsigned Precedence;  // Precedence if a binary op.
+    public:
+      PrototypeAST(const std::string &name, const std::vector<std::string> &args,
+                   bool isoperator = false, unsigned prec = 0)
+      : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
+
+      bool isUnaryOp() const { return isOperator && Args.size() == 1; }
+      bool isBinaryOp() const { return isOperator && Args.size() == 2; }
+
+      char getOperatorName() const {
+        assert(isUnaryOp() || isBinaryOp());
+        return Name[Name.size()-1];
+      }
+
+      unsigned getBinaryPrecedence() const { return Precedence; }
+
+      Function *Codegen();
+    };
+
+    /// FunctionAST - This class represents a function definition itself.
+    class FunctionAST {
+      PrototypeAST *Proto;
+      ExprAST *Body;
+    public:
+      FunctionAST(PrototypeAST *proto, ExprAST *body)
+        : Proto(proto), Body(body) {}
+
+      Function *Codegen();
+    };
+
+    //===----------------------------------------------------------------------===//
+    // Parser
+    //===----------------------------------------------------------------------===//
+
+    /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+    /// token the parser is looking at.  getNextToken reads another token from the
+    /// lexer and updates CurTok with its results.
+    static int CurTok;
+    static int getNextToken() {
+      return CurTok = gettok();
+    }
+
+    /// BinopPrecedence - This holds the precedence for each binary operator that is
+    /// defined.
+    static std::map<char, int> BinopPrecedence;
+
+    /// GetTokPrecedence - Get the precedence of the pending binary operator token.
+    static int GetTokPrecedence() {
+      if (!isascii(CurTok))
+        return -1;
+
+      // Make sure it's a declared binop.
+      int TokPrec = BinopPrecedence[CurTok];
+      if (TokPrec <= 0) return -1;
+      return TokPrec;
+    }
+
+    /// Error* - These are little helper functions for error handling.
+    ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+    PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+    FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+    static ExprAST *ParseExpression();
+
+    /// identifierexpr
+    ///   ::= identifier
+    ///   ::= identifier '(' expression* ')'
+    static ExprAST *ParseIdentifierExpr() {
+      std::string IdName = IdentifierStr;
+
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '(') // Simple variable ref.
+        return new VariableExprAST(IdName);
+
+      // Call.
+      getNextToken();  // eat (
+      std::vector<ExprAST*> Args;
+      if (CurTok != ')') {
+        while (1) {
+          ExprAST *Arg = ParseExpression();
+          if (!Arg) return 0;
+          Args.push_back(Arg);
+
+          if (CurTok == ')') break;
+
+          if (CurTok != ',')
+            return Error("Expected ')' or ',' in argument list");
+          getNextToken();
+        }
+      }
+
+      // Eat the ')'.
+      getNextToken();
+
+      return new CallExprAST(IdName, Args);
+    }
+
+    /// numberexpr ::= number
+    static ExprAST *ParseNumberExpr() {
+      ExprAST *Result = new NumberExprAST(NumVal);
+      getNextToken(); // consume the number
+      return Result;
+    }
+
+    /// parenexpr ::= '(' expression ')'
+    static ExprAST *ParseParenExpr() {
+      getNextToken();  // eat (.
+      ExprAST *V = ParseExpression();
+      if (!V) return 0;
+
+      if (CurTok != ')')
+        return Error("expected ')'");
+      getNextToken();  // eat ).
+      return V;
+    }
+
+    /// ifexpr ::= 'if' expression 'then' expression 'else' expression
+    static ExprAST *ParseIfExpr() {
+      getNextToken();  // eat the if.
+
+      // condition.
+      ExprAST *Cond = ParseExpression();
+      if (!Cond) return 0;
+
+      if (CurTok != tok_then)
+        return Error("expected then");
+      getNextToken();  // eat the then
+
+      ExprAST *Then = ParseExpression();
+      if (Then == 0) return 0;
+
+      if (CurTok != tok_else)
+        return Error("expected else");
+
+      getNextToken();
+
+      ExprAST *Else = ParseExpression();
+      if (!Else) return 0;
+
+      return new IfExprAST(Cond, Then, Else);
+    }
+
+    /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+    static ExprAST *ParseForExpr() {
+      getNextToken();  // eat the for.
+
+      if (CurTok != tok_identifier)
+        return Error("expected identifier after for");
+
+      std::string IdName = IdentifierStr;
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '=')
+        return Error("expected '=' after for");
+      getNextToken();  // eat '='.
+
+
+      ExprAST *Start = ParseExpression();
+      if (Start == 0) return 0;
+      if (CurTok != ',')
+        return Error("expected ',' after for start value");
+      getNextToken();
+
+      ExprAST *End = ParseExpression();
+      if (End == 0) return 0;
+
+      // The step value is optional.
+      ExprAST *Step = 0;
+      if (CurTok == ',') {
+        getNextToken();
+        Step = ParseExpression();
+        if (Step == 0) return 0;
+      }
+
+      if (CurTok != tok_in)
+        return Error("expected 'in' after for");
+      getNextToken();  // eat 'in'.
+
+      ExprAST *Body = ParseExpression();
+      if (Body == 0) return 0;
+
+      return new ForExprAST(IdName, Start, End, Step, Body);
+    }
+
+    /// primary
+    ///   ::= identifierexpr
+    ///   ::= numberexpr
+    ///   ::= parenexpr
+    ///   ::= ifexpr
+    ///   ::= forexpr
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      case tok_if:         return ParseIfExpr();
+      case tok_for:        return ParseForExpr();
+      }
+    }
+
+    /// unary
+    ///   ::= primary
+    ///   ::= '!' unary
+    static ExprAST *ParseUnary() {
+      // If the current token is not an operator, it must be a primary expr.
+      if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+        return ParsePrimary();
+
+      // If this is a unary operator, read it.
+      int Opc = CurTok;
+      getNextToken();
+      if (ExprAST *Operand = ParseUnary())
+        return new UnaryExprAST(Opc, Operand);
+      return 0;
+    }
+
+    /// binoprhs
+    ///   ::= ('+' unary)*
+    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+      // If this is a binop, find its precedence.
+      while (1) {
+        int TokPrec = GetTokPrecedence();
+
+        // If this is a binop that binds at least as tightly as the current binop,
+        // consume it, otherwise we are done.
+        if (TokPrec < ExprPrec)
+          return LHS;
+
+        // Okay, we know this is a binop.
+        int BinOp = CurTok;
+        getNextToken();  // eat binop
+
+        // Parse the unary expression after the binary operator.
+        ExprAST *RHS = ParseUnary();
+        if (!RHS) return 0;
+
+        // If BinOp binds less tightly with RHS than the operator after RHS, let
+        // the pending operator take RHS as its LHS.
+        int NextPrec = GetTokPrecedence();
+        if (TokPrec < NextPrec) {
+          RHS = ParseBinOpRHS(TokPrec+1, RHS);
+          if (RHS == 0) return 0;
+        }
+
+        // Merge LHS/RHS.
+        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+      }
+    }
+
+    /// expression
+    ///   ::= unary binoprhs
+    ///
+    static ExprAST *ParseExpression() {
+      ExprAST *LHS = ParseUnary();
+      if (!LHS) return 0;
+
+      return ParseBinOpRHS(0, LHS);
+    }
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    ///   ::= binary LETTER number? (id, id)
+    ///   ::= unary LETTER (id)
+    static PrototypeAST *ParsePrototype() {
+      std::string FnName;
+
+      unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
+      unsigned BinaryPrecedence = 30;
+
+      switch (CurTok) {
+      default:
+        return ErrorP("Expected function name in prototype");
+      case tok_identifier:
+        FnName = IdentifierStr;
+        Kind = 0;
+        getNextToken();
+        break;
+      case tok_unary:
+        getNextToken();
+        if (!isascii(CurTok))
+          return ErrorP("Expected unary operator");
+        FnName = "unary";
+        FnName += (char)CurTok;
+        Kind = 1;
+        getNextToken();
+        break;
+      case tok_binary:
+        getNextToken();
+        if (!isascii(CurTok))
+          return ErrorP("Expected binary operator");
+        FnName = "binary";
+        FnName += (char)CurTok;
+        Kind = 2;
+        getNextToken();
+
+        // Read the precedence if present.
+        if (CurTok == tok_number) {
+          if (NumVal < 1 || NumVal > 100)
+            return ErrorP("Invalid precedecnce: must be 1..100");
+          BinaryPrecedence = (unsigned)NumVal;
+          getNextToken();
+        }
+        break;
+      }
+
+      if (CurTok != '(')
+        return ErrorP("Expected '(' in prototype");
+
+      std::vector<std::string> ArgNames;
+      while (getNextToken() == tok_identifier)
+        ArgNames.push_back(IdentifierStr);
+      if (CurTok != ')')
+        return ErrorP("Expected ')' in prototype");
+
+      // success.
+      getNextToken();  // eat ')'.
+
+      // Verify right number of names for operator.
+      if (Kind && ArgNames.size() != Kind)
+        return ErrorP("Invalid number of operands for operator");
+
+      return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
+    }
+
+    /// definition ::= 'def' prototype expression
+    static FunctionAST *ParseDefinition() {
+      getNextToken();  // eat def.
+      PrototypeAST *Proto = ParsePrototype();
+      if (Proto == 0) return 0;
+
+      if (ExprAST *E = ParseExpression())
+        return new FunctionAST(Proto, E);
+      return 0;
+    }
+
+    /// toplevelexpr ::= expression
+    static FunctionAST *ParseTopLevelExpr() {
+      if (ExprAST *E = ParseExpression()) {
+        // Make an anonymous proto.
+        PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+        return new FunctionAST(Proto, E);
+      }
+      return 0;
+    }
+
+    /// external ::= 'extern' prototype
+    static PrototypeAST *ParseExtern() {
+      getNextToken();  // eat extern.
+      return ParsePrototype();
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Code Generation
+    //===----------------------------------------------------------------------===//
+
+    static Module *TheModule;
+    static IRBuilder<> Builder(getGlobalContext());
+    static std::map<std::string, Value*> NamedValues;
+    static FunctionPassManager *TheFPM;
+
+    Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+    Value *NumberExprAST::Codegen() {
+      return ConstantFP::get(getGlobalContext(), APFloat(Val));
+    }
+
+    Value *VariableExprAST::Codegen() {
+      // Look this variable up in the function.
+      Value *V = NamedValues[Name];
+      return V ? V : ErrorV("Unknown variable name");
+    }
+
+    Value *UnaryExprAST::Codegen() {
+      Value *OperandV = Operand->Codegen();
+      if (OperandV == 0) return 0;
+
+      Function *F = TheModule->getFunction(std::string("unary")+Opcode);
+      if (F == 0)
+        return ErrorV("Unknown unary operator");
+
+      return Builder.CreateCall(F, OperandV, "unop");
+    }
+
+    Value *BinaryExprAST::Codegen() {
+      Value *L = LHS->Codegen();
+      Value *R = RHS->Codegen();
+      if (L == 0 || R == 0) return 0;
+
+      switch (Op) {
+      case '+': return Builder.CreateFAdd(L, R, "addtmp");
+      case '-': return Builder.CreateFSub(L, R, "subtmp");
+      case '*': return Builder.CreateFMul(L, R, "multmp");
+      case '<':
+        L = Builder.CreateFCmpULT(L, R, "cmptmp");
+        // Convert bool 0/1 to double 0.0 or 1.0
+        return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                    "booltmp");
+      default: break;
+      }
+
+      // If it wasn't a builtin binary operator, it must be a user defined one. Emit
+      // a call to it.
+      Function *F = TheModule->getFunction(std::string("binary")+Op);
+      assert(F && "binary operator not found!");
+
+      Value *Ops[2] = { L, R };
+      return Builder.CreateCall(F, Ops, "binop");
+    }
+
+    Value *CallExprAST::Codegen() {
+      // Look up the name in the global module table.
+      Function *CalleeF = TheModule->getFunction(Callee);
+      if (CalleeF == 0)
+        return ErrorV("Unknown function referenced");
+
+      // If argument mismatch error.
+      if (CalleeF->arg_size() != Args.size())
+        return ErrorV("Incorrect # arguments passed");
+
+      std::vector<Value*> ArgsV;
+      for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+        ArgsV.push_back(Args[i]->Codegen());
+        if (ArgsV.back() == 0) return 0;
+      }
+
+      return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
+    }
+
+    Value *IfExprAST::Codegen() {
+      Value *CondV = Cond->Codegen();
+      if (CondV == 0) return 0;
+
+      // Convert condition to a bool by comparing equal to 0.0.
+      CondV = Builder.CreateFCmpONE(CondV,
+                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                    "ifcond");
+
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+      // Create blocks for the then and else cases.  Insert the 'then' block at the
+      // end of the function.
+      BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+      BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+      BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+
+      Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+
+      // Emit then value.
+      Builder.SetInsertPoint(ThenBB);
+
+      Value *ThenV = Then->Codegen();
+      if (ThenV == 0) return 0;
+
+      Builder.CreateBr(MergeBB);
+      // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+      ThenBB = Builder.GetInsertBlock();
+
+      // Emit else block.
+      TheFunction->getBasicBlockList().push_back(ElseBB);
+      Builder.SetInsertPoint(ElseBB);
+
+      Value *ElseV = Else->Codegen();
+      if (ElseV == 0) return 0;
+
+      Builder.CreateBr(MergeBB);
+      // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+      ElseBB = Builder.GetInsertBlock();
+
+      // Emit merge block.
+      TheFunction->getBasicBlockList().push_back(MergeBB);
+      Builder.SetInsertPoint(MergeBB);
+      PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
+                                      "iftmp");
+
+      PN->addIncoming(ThenV, ThenBB);
+      PN->addIncoming(ElseV, ElseBB);
+      return PN;
+    }
+
+    Value *ForExprAST::Codegen() {
+      // Output this as:
+      //   ...
+      //   start = startexpr
+      //   goto loop
+      // loop:
+      //   variable = phi [start, loopheader], [nextvariable, loopend]
+      //   ...
+      //   bodyexpr
+      //   ...
+      // loopend:
+      //   step = stepexpr
+      //   nextvariable = variable + step
+      //   endcond = endexpr
+      //   br endcond, loop, endloop
+      // outloop:
+
+      // Emit the start code first, without 'variable' in scope.
+      Value *StartVal = Start->Codegen();
+      if (StartVal == 0) return 0;
+
+      // Make the new basic block for the loop header, inserting after current
+      // block.
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+      BasicBlock *PreheaderBB = Builder.GetInsertBlock();
+      BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+
+      // Insert an explicit fall through from the current block to the LoopBB.
+      Builder.CreateBr(LoopBB);
+
+      // Start insertion in LoopBB.
+      Builder.SetInsertPoint(LoopBB);
+
+      // Start the PHI node with an entry for Start.
+      PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
+      Variable->addIncoming(StartVal, PreheaderBB);
+
+      // Within the loop, the variable is defined equal to the PHI node.  If it
+      // shadows an existing variable, we have to restore it, so save it now.
+      Value *OldVal = NamedValues[VarName];
+      NamedValues[VarName] = Variable;
+
+      // Emit the body of the loop.  This, like any other expr, can change the
+      // current BB.  Note that we ignore the value computed by the body, but don't
+      // allow an error.
+      if (Body->Codegen() == 0)
+        return 0;
+
+      // Emit the step value.
+      Value *StepVal;
+      if (Step) {
+        StepVal = Step->Codegen();
+        if (StepVal == 0) return 0;
+      } else {
+        // If not specified, use 1.0.
+        StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+      }
+
+      Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
+
+      // Compute the end condition.
+      Value *EndCond = End->Codegen();
+      if (EndCond == 0) return EndCond;
+
+      // Convert condition to a bool by comparing equal to 0.0.
+      EndCond = Builder.CreateFCmpONE(EndCond,
+                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                      "loopcond");
+
+      // Create the "after loop" block and insert it.
+      BasicBlock *LoopEndBB = Builder.GetInsertBlock();
+      BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+
+      // Insert the conditional branch into the end of LoopEndBB.
+      Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+
+      // Any new code will be inserted in AfterBB.
+      Builder.SetInsertPoint(AfterBB);
+
+      // Add a new entry to the PHI node for the backedge.
+      Variable->addIncoming(NextVar, LoopEndBB);
+
+      // Restore the unshadowed variable.
+      if (OldVal)
+        NamedValues[VarName] = OldVal;
+      else
+        NamedValues.erase(VarName);
+
+
+      // for expr always returns 0.0.
+      return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+    }
+
+    Function *PrototypeAST::Codegen() {
+      // Make the function type:  double(double,double) etc.
+      std::vector<Type*> Doubles(Args.size(),
+                                 Type::getDoubleTy(getGlobalContext()));
+      FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                           Doubles, false);
+
+      Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+
+      // If F conflicted, there was already something named 'Name'.  If it has a
+      // body, don't allow redefinition or reextern.
+      if (F->getName() != Name) {
+        // Delete the one we just made and get the existing one.
+        F->eraseFromParent();
+        F = TheModule->getFunction(Name);
+
+        // If F already has a body, reject this.
+        if (!F->empty()) {
+          ErrorF("redefinition of function");
+          return 0;
+        }
+
+        // If F took a different number of args, reject.
+        if (F->arg_size() != Args.size()) {
+          ErrorF("redefinition of function with different # args");
+          return 0;
+        }
+      }
+
+      // Set names for all arguments.
+      unsigned Idx = 0;
+      for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+           ++AI, ++Idx) {
+        AI->setName(Args[Idx]);
+
+        // Add arguments to variable symbol table.
+        NamedValues[Args[Idx]] = AI;
+      }
+
+      return F;
+    }
+
+    Function *FunctionAST::Codegen() {
+      NamedValues.clear();
+
+      Function *TheFunction = Proto->Codegen();
+      if (TheFunction == 0)
+        return 0;
+
+      // If this is an operator, install it.
+      if (Proto->isBinaryOp())
+        BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
+
+      // Create a new basic block to start insertion into.
+      BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+      Builder.SetInsertPoint(BB);
+
+      if (Value *RetVal = Body->Codegen()) {
+        // Finish off the function.
+        Builder.CreateRet(RetVal);
+
+        // Validate the generated code, checking for consistency.
+        verifyFunction(*TheFunction);
+
+        // Optimize the function.
+        TheFPM->run(*TheFunction);
+
+        return TheFunction;
+      }
+
+      // Error reading body, remove function.
+      TheFunction->eraseFromParent();
+
+      if (Proto->isBinaryOp())
+        BinopPrecedence.erase(Proto->getOperatorName());
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Top-Level parsing and JIT Driver
+    //===----------------------------------------------------------------------===//
+
+    static ExecutionEngine *TheExecutionEngine;
+
+    static void HandleDefinition() {
+      if (FunctionAST *F = ParseDefinition()) {
+        if (Function *LF = F->Codegen()) {
+          fprintf(stderr, "Read function definition:");
+          LF->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleExtern() {
+      if (PrototypeAST *P = ParseExtern()) {
+        if (Function *F = P->Codegen()) {
+          fprintf(stderr, "Read extern: ");
+          F->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleTopLevelExpression() {
+      // Evaluate a top-level expression into an anonymous function.
+      if (FunctionAST *F = ParseTopLevelExpr()) {
+        if (Function *LF = F->Codegen()) {
+          // JIT the function, returning a function pointer.
+          void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+
+          // Cast it to the right type (takes no arguments, returns a double) so we
+          // can call it as a native function.
+          double (*FP)() = (double (*)())(intptr_t)FPtr;
+          fprintf(stderr, "Evaluated to %f\n", FP());
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    /// top ::= definition | external | expression | ';'
+    static void MainLoop() {
+      while (1) {
+        fprintf(stderr, "ready> ");
+        switch (CurTok) {
+        case tok_eof:    return;
+        case ';':        getNextToken(); break;  // ignore top-level semicolons.
+        case tok_def:    HandleDefinition(); break;
+        case tok_extern: HandleExtern(); break;
+        default:         HandleTopLevelExpression(); break;
+        }
+      }
+    }
+
+    //===----------------------------------------------------------------------===//
+    // "Library" functions that can be "extern'd" from user code.
+    //===----------------------------------------------------------------------===//
+
+    /// putchard - putchar that takes a double and returns 0.
+    extern "C"
+    double putchard(double X) {
+      putchar((char)X);
+      return 0;
+    }
+
+    /// printd - printf that takes a double prints it as "%f\n", returning 0.
+    extern "C"
+    double printd(double X) {
+      printf("%f\n", X);
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Main driver code.
+    //===----------------------------------------------------------------------===//
+
+    int main() {
+      InitializeNativeTarget();
+      LLVMContext &Context = getGlobalContext();
+
+      // Install standard binary operators.
+      // 1 is lowest precedence.
+      BinopPrecedence['<'] = 10;
+      BinopPrecedence['+'] = 20;
+      BinopPrecedence['-'] = 20;
+      BinopPrecedence['*'] = 40;  // highest.
+
+      // Prime the first token.
+      fprintf(stderr, "ready> ");
+      getNextToken();
+
+      // Make the module, which holds all the code.
+      TheModule = new Module("my cool jit", Context);
+
+      // Create the JIT.  This takes ownership of the module.
+      std::string ErrStr;
+      TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+      if (!TheExecutionEngine) {
+        fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
+        exit(1);
+      }
+
+      FunctionPassManager OurFPM(TheModule);
+
+      // Set up the optimizer pipeline.  Start with registering info about how the
+      // target lays out data structures.
+      OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
+      // Provide basic AliasAnalysis support for GVN.
+      OurFPM.add(createBasicAliasAnalysisPass());
+      // Do simple "peephole" optimizations and bit-twiddling optzns.
+      OurFPM.add(createInstructionCombiningPass());
+      // Reassociate expressions.
+      OurFPM.add(createReassociatePass());
+      // Eliminate Common SubExpressions.
+      OurFPM.add(createGVNPass());
+      // Simplify the control flow graph (deleting unreachable blocks, etc).
+      OurFPM.add(createCFGSimplificationPass());
+
+      OurFPM.doInitialization();
+
+      // Set the global so the code gen can use this.
+      TheFPM = &OurFPM;
+
+      // Run the main "interpreter loop" now.
+      MainLoop();
+
+      TheFPM = 0;
+
+      // Print out all of the generated code.
+      TheModule->dump();
+
+      return 0;
+    }
+
+`Next: Extending the language: mutable variables / SSA
+construction <LangImpl7.html>`_
+
diff --git a/docs/tutorial/LangImpl7.html b/docs/tutorial/LangImpl7.html
deleted file mode 100644
index 8fa99b1903..0000000000
--- a/docs/tutorial/LangImpl7.html
+++ /dev/null
@@ -1,2164 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Extending the Language: Mutable Variables / SSA
-         construction</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Extending the Language: Mutable Variables</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 7
-  <ol>
-    <li><a href="#intro">Chapter 7 Introduction</a></li>
-    <li><a href="#why">Why is this a hard problem?</a></li>
-    <li><a href="#memory">Memory in LLVM</a></li>
-    <li><a href="#kalvars">Mutable Variables in Kaleidoscope</a></li>
-    <li><a href="#adjustments">Adjusting Existing Variables for
-     Mutation</a></li>
-    <li><a href="#assignment">New Assignment Operator</a></li>
-    <li><a href="#localvars">User-defined Local Variables</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="LangImpl8.html">Chapter 8</a>: Conclusion and other useful LLVM
- tidbits</li>
-</ul>
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 7 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 7 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  In chapters 1 through 6, we've built a very
-respectable, albeit simple, <a 
-href="http://en.wikipedia.org/wiki/Functional_programming">functional
-programming language</a>.  In our journey, we learned some parsing techniques,
-how to build and represent an AST, how to build LLVM IR, and how to optimize
-the resultant code as well as JIT compile it.</p>
-
-<p>While Kaleidoscope is interesting as a functional language, the fact that it
-is functional makes it "too easy" to generate LLVM IR for it.  In particular, a 
-functional language makes it very easy to build LLVM IR directly in <a 
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">SSA form</a>.
-Since LLVM requires that the input code be in SSA form, this is a very nice
-property and it is often unclear to newcomers how to generate code for an
-imperative language with mutable variables.</p>
-
-<p>The short (and happy) summary of this chapter is that there is no need for
-your front-end to build SSA form: LLVM provides highly tuned and well tested
-support for this, though the way it works is a bit unexpected for some.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="why">Why is this a hard problem?</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-To understand why mutable variables cause complexities in SSA construction, 
-consider this extremely simple C example:
-</p>
-
-<div class="doc_code">
-<pre>
-int G, H;
-int test(_Bool Condition) {
-  int X;
-  if (Condition)
-    X = G;
-  else
-    X = H;
-  return X;
-}
-</pre>
-</div>
-
-<p>In this case, we have the variable "X", whose value depends on the path 
-executed in the program.  Because there are two different possible values for X
-before the return instruction, a PHI node is inserted to merge the two values.
-The LLVM IR that we want for this example looks like this:</p>
-
-<div class="doc_code">
-<pre>
-@G = weak global i32 0   ; type of @G is i32*
-@H = weak global i32 0   ; type of @H is i32*
-
-define i32 @test(i1 %Condition) {
-entry:
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-  br label %cond_next
-
-cond_next:
-  %X.2 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
-  ret i32 %X.2
-}
-</pre>
-</div>
-
-<p>In this example, the loads from the G and H global variables are explicit in
-the LLVM IR, and they live in the then/else branches of the if statement
-(cond_true/cond_false).  In order to merge the incoming values, the X.2 phi node
-in the cond_next block selects the right value to use based on where control 
-flow is coming from: if control flow comes from the cond_false block, X.2 gets
-the value of X.1.  Alternatively, if control flow comes from cond_true, it gets
-the value of X.0.  The intent of this chapter is not to explain the details of
-SSA form.  For more information, see one of the many <a 
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">online 
-references</a>.</p>
-
-<p>The question for this article is "who places the phi nodes when lowering 
-assignments to mutable variables?".  The issue here is that LLVM 
-<em>requires</em> that its IR be in SSA form: there is no "non-ssa" mode for it.
-However, SSA construction requires non-trivial algorithms and data structures,
-so it is inconvenient and wasteful for every front-end to have to reproduce this
-logic.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="memory">Memory in LLVM</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The 'trick' here is that while LLVM does require all register values to be
-in SSA form, it does not require (or permit) memory objects to be in SSA form.
-In the example above, note that the loads from G and H are direct accesses to
-G and H: they are not renamed or versioned.  This differs from some other
-compiler systems, which do try to version memory objects.  In LLVM, instead of
-encoding dataflow analysis of memory into the LLVM IR, it is handled with <a 
-href="../WritingAnLLVMPass.html">Analysis Passes</a> which are computed on
-demand.</p>
-
-<p>
-With this in mind, the high-level idea is that we want to make a stack variable
-(which lives in memory, because it is on the stack) for each mutable object in
-a function.  To take advantage of this trick, we need to talk about how LLVM
-represents stack variables.
-</p>
-
-<p>In LLVM, all memory accesses are explicit with load/store instructions, and
-it is carefully designed not to have (or need) an "address-of" operator.  Notice
-how the type of the @G/@H global variables is actually "i32*" even though the 
-variable is defined as "i32".  What this means is that @G defines <em>space</em>
-for an i32 in the global data area, but its <em>name</em> actually refers to the
-address for that space.  Stack variables work the same way, except that instead of 
-being declared with global variable definitions, they are declared with the 
-<a href="../LangRef.html#i_alloca">LLVM alloca instruction</a>:</p>
-
-<div class="doc_code">
-<pre>
-define i32 @example() {
-entry:
-  %X = alloca i32           ; type of %X is i32*.
-  ...
-  %tmp = load i32* %X       ; load the stack value %X from the stack.
-  %tmp2 = add i32 %tmp, 1   ; increment it
-  store i32 %tmp2, i32* %X  ; store it back
-  ...
-</pre>
-</div>
-
-<p>This code shows an example of how you can declare and manipulate a stack
-variable in the LLVM IR.  Stack memory allocated with the alloca instruction is
-fully general: you can pass the address of the stack slot to functions, you can
-store it in other variables, etc.  In our example above, we could rewrite the
-example to use the alloca technique to avoid using a PHI node:</p>
-
-<div class="doc_code">
-<pre>
-@G = weak global i32 0   ; type of @G is i32*
-@H = weak global i32 0   ; type of @H is i32*
-
-define i32 @test(i1 %Condition) {
-entry:
-  %X = alloca i32           ; type of %X is i32*.
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-  store i32 %X.0, i32* %X   ; Update X
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-  store i32 %X.1, i32* %X   ; Update X
-  br label %cond_next
-
-cond_next:
-  %X.2 = load i32* %X       ; Read X
-  ret i32 %X.2
-}
-</pre>
-</div>
-
-<p>With this, we have discovered a way to handle arbitrary mutable variables
-without the need to create Phi nodes at all:</p>
-
-<ol>
-<li>Each mutable variable becomes a stack allocation.</li>
-<li>Each read of the variable becomes a load from the stack.</li>
-<li>Each update of the variable becomes a store to the stack.</li>
-<li>Taking the address of a variable just uses the stack address directly.</li>
-</ol>
-
-<p>While this solution has solved our immediate problem, it introduced another
-one: we have now apparently introduced a lot of stack traffic for very simple
-and common operations, a major performance problem.  Fortunately for us, the
-LLVM optimizer has a highly-tuned optimization pass named "mem2reg" that handles
-this case, promoting allocas like this into SSA registers, inserting Phi nodes
-as appropriate.  If you run this example through the pass, for example, you'll
-get:</p>
-
-<div class="doc_code">
-<pre>
-$ <b>llvm-as &lt; example.ll | opt -mem2reg | llvm-dis</b>
-@G = weak global i32 0
-@H = weak global i32 0
-
-define i32 @test(i1 %Condition) {
-entry:
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-  br label %cond_next
-
-cond_next:
-  %X.01 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
-  ret i32 %X.01
-}
-</pre>
-</div>
-
-<p>The mem2reg pass implements the standard "iterated dominance frontier"
-algorithm for constructing SSA form and has a number of optimizations that speed
-up (very common) degenerate cases. The mem2reg optimization pass is the answer to dealing 
-with mutable variables, and we highly recommend that you depend on it.  Note that
-mem2reg only works on variables in certain circumstances:</p>
-
-<ol>
-<li>mem2reg is alloca-driven: it looks for allocas and if it can handle them, it
-promotes them.  It does not apply to global variables or heap allocations.</li>
-
-<li>mem2reg only looks for alloca instructions in the entry block of the
-function.  Being in the entry block guarantees that the alloca is only executed
-once, which makes analysis simpler.</li>
-
-<li>mem2reg only promotes allocas whose uses are direct loads and stores.  If
-the address of the stack object is passed to a function, or if any funny pointer
-arithmetic is involved, the alloca will not be promoted.</li>
-
-<li>mem2reg only works on allocas of <a 
-href="../LangRef.html#t_classifications">first class</a> 
-values (such as pointers, scalars and vectors), and only if the array size
-of the allocation is 1 (or missing in the .ll file).  mem2reg is not capable of
-promoting structs or arrays to registers.  Note that the "scalarrepl" pass is
-more powerful and can promote structs, "unions", and arrays in many cases.</li>
-
-</ol>
-
-<p>
-All of these properties are easy to satisfy for most imperative languages, and
-we'll illustrate it below with Kaleidoscope.  The final question you may be
-asking is: should I bother with this nonsense for my front-end?  Wouldn't it be
-better if I just did SSA construction directly, avoiding use of the mem2reg
-optimization pass?  In short, we strongly recommend that you use this technique
-for building SSA form, unless there is an extremely good reason not to.  Using
-this technique is:</p>
-
-<ul>
-<li>Proven and well tested: llvm-gcc and clang both use this technique for local
-mutable variables.  As such, the most common clients of LLVM are using this to
-handle a bulk of their variables.  You can be sure that bugs are found fast and
-fixed early.</li>
-
-<li>Extremely Fast: mem2reg has a number of special cases that make it fast in
-common cases as well as fully general.  For example, it has fast-paths for
-variables that are only used in a single block, variables that only have one
-assignment point, good heuristics to avoid insertion of unneeded phi nodes, etc.
-</li>
-
-<li>Needed for debug info generation: <a href="../SourceLevelDebugging.html">
-Debug information in LLVM</a> relies on having the address of the variable
-exposed so that debug info can be attached to it.  This technique dovetails 
-very naturally with this style of debug info.</li>
-</ul>
-
-<p>If nothing else, this makes it much easier to get your front-end up and 
-running, and is very simple to implement.  Lets extend Kaleidoscope with mutable
-variables now!
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="kalvars">Mutable Variables in Kaleidoscope</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Now that we know the sort of problem we want to tackle, lets see what this
-looks like in the context of our little Kaleidoscope language.  We're going to
-add two features:</p>
-
-<ol>
-<li>The ability to mutate variables with the '=' operator.</li>
-<li>The ability to define new variables.</li>
-</ol>
-
-<p>While the first item is really what this is about, we only have variables
-for incoming arguments as well as for induction variables, and redefining those only
-goes so far :).  Also, the ability to define new variables is a
-useful thing regardless of whether you will be mutating them.  Here's a
-motivating example that shows how we could use these:</p>
-
-<div class="doc_code">
-<pre>
-# Define ':' for sequencing: as a low-precedence operator that ignores operands
-# and just returns the RHS.
-def binary : 1 (x y) y;
-
-# Recursive fib, we could do this before.
-def fib(x)
-  if (x &lt; 3) then
-    1
-  else
-    fib(x-1)+fib(x-2);
-
-# Iterative fib.
-def fibi(x)
-  <b>var a = 1, b = 1, c in</b>
-  (for i = 3, i &lt; x in 
-     <b>c = a + b</b> :
-     <b>a = b</b> :
-     <b>b = c</b>) :
-  b;
-
-# Call it. 
-fibi(10);
-</pre>
-</div>
-
-<p>
-In order to mutate variables, we have to change our existing variables to use
-the "alloca trick".  Once we have that, we'll add our new operator, then extend
-Kaleidoscope to support new variable definitions.
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="adjustments">Adjusting Existing Variables for Mutation</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-The symbol table in Kaleidoscope is managed at code generation time by the 
-'<tt>NamedValues</tt>' map.  This map currently keeps track of the LLVM "Value*"
-that holds the double value for the named variable.  In order to support
-mutation, we need to change this slightly, so that it <tt>NamedValues</tt> holds
-the <em>memory location</em> of the variable in question.  Note that this 
-change is a refactoring: it changes the structure of the code, but does not
-(by itself) change the behavior of the compiler.  All of these changes are 
-isolated in the Kaleidoscope code generator.</p>
-
-<p>
-At this point in Kaleidoscope's development, it only supports variables for two
-things: incoming arguments to functions and the induction variable of 'for'
-loops.  For consistency, we'll allow mutation of these variables in addition to
-other user-defined variables.  This means that these will both need memory
-locations.
-</p>
-
-<p>To start our transformation of Kaleidoscope, we'll change the NamedValues
-map so that it maps to AllocaInst* instead of Value*.  Once we do this, the C++ 
-compiler will tell us what parts of the code we need to update:</p>
-
-<div class="doc_code">
-<pre>
-static std::map&lt;std::string, AllocaInst*&gt; NamedValues;
-</pre>
-</div>
-
-<p>Also, since we will need to create these alloca's, we'll use a helper
-function that ensures that the allocas are created in the entry block of the
-function:</p>
-
-<div class="doc_code">
-<pre>
-/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
-/// the function.  This is used for mutable variables etc.
-static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
-                                          const std::string &amp;VarName) {
-  IRBuilder&lt;&gt; TmpB(&amp;TheFunction-&gt;getEntryBlock(),
-                 TheFunction-&gt;getEntryBlock().begin());
-  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
-                           VarName.c_str());
-}
-</pre>
-</div>
-
-<p>This funny looking code creates an IRBuilder object that is pointing at
-the first instruction (.begin()) of the entry block.  It then creates an alloca
-with the expected name and returns it.  Because all values in Kaleidoscope are
-doubles, there is no need to pass in a type to use.</p>
-
-<p>With this in place, the first functionality change we want to make is to
-variable references.  In our new scheme, variables live on the stack, so code
-generating a reference to them actually needs to produce a load from the stack
-slot:</p>
-
-<div class="doc_code">
-<pre>
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  if (V == 0) return ErrorV("Unknown variable name");
-
-  <b>// Load the value.
-  return Builder.CreateLoad(V, Name.c_str());</b>
-}
-</pre>
-</div>
-
-<p>As you can see, this is pretty straightforward.  Now we need to update the
-things that define the variables to set up the alloca.  We'll start with 
-<tt>ForExprAST::Codegen</tt> (see the <a href="#code">full code listing</a> for
-the unabridged code):</p>
-
-<div class="doc_code">
-<pre>
-  Function *TheFunction = Builder.GetInsertBlock()->getParent();
-
-  <b>// Create an alloca for the variable in the entry block.
-  AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);</b>
-  
-    // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start-&gt;Codegen();
-  if (StartVal == 0) return 0;
-  
-  <b>// Store the value into the alloca.
-  Builder.CreateStore(StartVal, Alloca);</b>
-  ...
-
-  // Compute the end condition.
-  Value *EndCond = End-&gt;Codegen();
-  if (EndCond == 0) return EndCond;
-  
-  <b>// Reload, increment, and restore the alloca.  This handles the case where
-  // the body of the loop mutates the variable.
-  Value *CurVar = Builder.CreateLoad(Alloca);
-  Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
-  Builder.CreateStore(NextVar, Alloca);</b>
-  ...
-</pre>
-</div>
-
-<p>This code is virtually identical to the code <a 
-href="LangImpl5.html#forcodegen">before we allowed mutable variables</a>.  The
-big difference is that we no longer have to construct a PHI node, and we use
-load/store to access the variable as needed.</p>
-
-<p>To support mutable argument variables, we need to also make allocas for them.
-The code for this is also pretty simple:</p>
-
-<div class="doc_code">
-<pre>
-/// CreateArgumentAllocas - Create an alloca for each argument and register the
-/// argument in the symbol table so that references to it will succeed.
-void PrototypeAST::CreateArgumentAllocas(Function *F) {
-  Function::arg_iterator AI = F-&gt;arg_begin();
-  for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
-    // Create an alloca for this variable.
-    AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
-
-    // Store the initial value into the alloca.
-    Builder.CreateStore(AI, Alloca);
-
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = Alloca;
-  }
-}
-</pre>
-</div>
-
-<p>For each argument, we make an alloca, store the input value to the function
-into the alloca, and register the alloca as the memory location for the
-argument.  This method gets invoked by <tt>FunctionAST::Codegen</tt> right after
-it sets up the entry block for the function.</p>
-
-<p>The final missing piece is adding the mem2reg pass, which allows us to get
-good codegen once again:</p>
-
-<div class="doc_code">
-<pre>
-    // Set up the optimizer pipeline.  Start with registering info about how the
-    // target lays out data structures.
-    OurFPM.add(new DataLayout(*TheExecutionEngine-&gt;getDataLayout()));
-    <b>// Promote allocas to registers.
-    OurFPM.add(createPromoteMemoryToRegisterPass());</b>
-    // Do simple "peephole" optimizations and bit-twiddling optzns.
-    OurFPM.add(createInstructionCombiningPass());
-    // Reassociate expressions.
-    OurFPM.add(createReassociatePass());
-</pre>
-</div>
-
-<p>It is interesting to see what the code looks like before and after the
-mem2reg optimization runs.  For example, this is the before/after code for our
-recursive fib function.  Before the optimization:</p>
-
-<div class="doc_code">
-<pre>
-define double @fib(double %x) {
-entry:
-  <b>%x1 = alloca double
-  store double %x, double* %x1
-  %x2 = load double* %x1</b>
-  %cmptmp = fcmp ult double %x2, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:		; preds = %entry
-  br label %ifcont
-
-else:		; preds = %entry
-  <b>%x3 = load double* %x1</b>
-  %subtmp = fsub double %x3, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  <b>%x4 = load double* %x1</b>
-  %subtmp5 = fsub double %x4, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  br label %ifcont
-
-ifcont:		; preds = %else, %then
-  %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
-  ret double %iftmp
-}
-</pre>
-</div>
-
-<p>Here there is only one variable (x, the input argument) but you can still
-see the extremely simple-minded code generation strategy we are using.  In the
-entry block, an alloca is created, and the initial input value is stored into
-it.  Each reference to the variable does a reload from the stack.  Also, note
-that we didn't modify the if/then/else expression, so it still inserts a PHI
-node.  While we could make an alloca for it, it is actually easier to create a 
-PHI node for it, so we still just make the PHI.</p>
-
-<p>Here is the code after the mem2reg pass runs:</p>
-
-<div class="doc_code">
-<pre>
-define double @fib(double %x) {
-entry:
-  %cmptmp = fcmp ult double <b>%x</b>, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:
-  br label %ifcont
-
-else:
-  %subtmp = fsub double <b>%x</b>, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  %subtmp5 = fsub double <b>%x</b>, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  br label %ifcont
-
-ifcont:		; preds = %else, %then
-  %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
-  ret double %iftmp
-}
-</pre>
-</div>
-
-<p>This is a trivial case for mem2reg, since there are no redefinitions of the
-variable.  The point of showing this is to calm your tension about inserting
-such blatent inefficiencies :).</p>
-
-<p>After the rest of the optimizers run, we get:</p>
-
-<div class="doc_code">
-<pre>
-define double @fib(double %x) {
-entry:
-  %cmptmp = fcmp ult double %x, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp ueq double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %else, label %ifcont
-
-else:
-  %subtmp = fsub double %x, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  %subtmp5 = fsub double %x, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  ret double %addtmp
-
-ifcont:
-  ret double 1.000000e+00
-}
-</pre>
-</div>
-
-<p>Here we see that the simplifycfg pass decided to clone the return instruction
-into the end of the 'else' block.  This allowed it to eliminate some branches
-and the PHI node.</p>
-
-<p>Now that all symbol table references are updated to use stack variables, 
-we'll add the assignment operator.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="assignment">New Assignment Operator</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>With our current framework, adding a new assignment operator is really
-simple.  We will parse it just like any other binary operator, but handle it
-internally (instead of allowing the user to define it).  The first step is to
-set a precedence:</p>
-
-<div class="doc_code">
-<pre>
- int main() {
-   // Install standard binary operators.
-   // 1 is lowest precedence.
-   <b>BinopPrecedence['='] = 2;</b>
-   BinopPrecedence['&lt;'] = 10;
-   BinopPrecedence['+'] = 20;
-   BinopPrecedence['-'] = 20;
-</pre>
-</div>
-
-<p>Now that the parser knows the precedence of the binary operator, it takes
-care of all the parsing and AST generation.  We just need to implement codegen
-for the assignment operator.  This looks like:</p> 
-
-<div class="doc_code">
-<pre>
-Value *BinaryExprAST::Codegen() {
-  // Special case '=' because we don't want to emit the LHS as an expression.
-  if (Op == '=') {
-    // Assignment requires the LHS to be an identifier.
-    VariableExprAST *LHSE = dynamic_cast&lt;VariableExprAST*&gt;(LHS);
-    if (!LHSE)
-      return ErrorV("destination of '=' must be a variable");
-</pre>
-</div>
-
-<p>Unlike the rest of the binary operators, our assignment operator doesn't
-follow the "emit LHS, emit RHS, do computation" model.  As such, it is handled
-as a special case before the other binary operators are handled.  The other 
-strange thing is that it requires the LHS to be a variable.  It is invalid to
-have "(x+1) = expr" - only things like "x = expr" are allowed.
-</p>
-
-<div class="doc_code">
-<pre>
-    // Codegen the RHS.
-    Value *Val = RHS-&gt;Codegen();
-    if (Val == 0) return 0;
-
-    // Look up the name.
-    Value *Variable = NamedValues[LHSE-&gt;getName()];
-    if (Variable == 0) return ErrorV("Unknown variable name");
-
-    Builder.CreateStore(Val, Variable);
-    return Val;
-  }
-  ...  
-</pre>
-</div>
-
-<p>Once we have the variable, codegen'ing the assignment is straightforward:
-we emit the RHS of the assignment, create a store, and return the computed
-value.  Returning a value allows for chained assignments like "X = (Y = Z)".</p>
-
-<p>Now that we have an assignment operator, we can mutate loop variables and
-arguments.  For example, we can now run code like this:</p>
-
-<div class="doc_code">
-<pre>
-# Function to print a double.
-extern printd(x);
-
-# Define ':' for sequencing: as a low-precedence operator that ignores operands
-# and just returns the RHS.
-def binary : 1 (x y) y;
-
-def test(x)
-  printd(x) :
-  x = 4 :
-  printd(x);
-
-test(123);
-</pre>
-</div>
-
-<p>When run, this example prints "123" and then "4", showing that we did
-actually mutate the value!  Okay, we have now officially implemented our goal:
-getting this to work requires SSA construction in the general case.  However,
-to be really useful, we want the ability to define our own local variables, lets
-add this next! 
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="localvars">User-defined Local Variables</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Adding var/in is just like any other other extensions we made to 
-Kaleidoscope: we extend the lexer, the parser, the AST and the code generator.
-The first step for adding our new 'var/in' construct is to extend the lexer.
-As before, this is pretty trivial, the code looks like this:</p>
-
-<div class="doc_code">
-<pre>
-enum Token {
-  ...
-  <b>// var definition
-  tok_var = -13</b>
-...
-}
-...
-static int gettok() {
-...
-    if (IdentifierStr == "in") return tok_in;
-    if (IdentifierStr == "binary") return tok_binary;
-    if (IdentifierStr == "unary") return tok_unary;
-    <b>if (IdentifierStr == "var") return tok_var;</b>
-    return tok_identifier;
-...
-</pre>
-</div>
-
-<p>The next step is to define the AST node that we will construct.  For var/in,
-it looks like this:</p>
-
-<div class="doc_code">
-<pre>
-/// VarExprAST - Expression class for var/in
-class VarExprAST : public ExprAST {
-  std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; VarNames;
-  ExprAST *Body;
-public:
-  VarExprAST(const std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; &amp;varnames,
-             ExprAST *body)
-  : VarNames(varnames), Body(body) {}
-  
-  virtual Value *Codegen();
-};
-</pre>
-</div>
-
-<p>var/in allows a list of names to be defined all at once, and each name can
-optionally have an initializer value.  As such, we capture this information in
-the VarNames vector.  Also, var/in has a body, this body is allowed to access
-the variables defined by the var/in.</p>
-
-<p>With this in place, we can define the parser pieces.  The first thing we do is add
-it as a primary expression:</p>
-
-<div class="doc_code">
-<pre>
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-///   ::= ifexpr
-///   ::= forexpr
-<b>///   ::= varexpr</b>
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  case tok_if:         return ParseIfExpr();
-  case tok_for:        return ParseForExpr();
-  <b>case tok_var:        return ParseVarExpr();</b>
-  }
-}
-</pre>
-</div>
-
-<p>Next we define ParseVarExpr:</p>
-
-<div class="doc_code">
-<pre>
-/// varexpr ::= 'var' identifier ('=' expression)? 
-//                    (',' identifier ('=' expression)?)* 'in' expression
-static ExprAST *ParseVarExpr() {
-  getNextToken();  // eat the var.
-
-  std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; VarNames;
-
-  // At least one variable name is required.
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after var");
-</pre>
-</div>
-
-<p>The first part of this code parses the list of identifier/expr pairs into the
-local <tt>VarNames</tt> vector.  
-
-<div class="doc_code">
-<pre>
-  while (1) {
-    std::string Name = IdentifierStr;
-    getNextToken();  // eat identifier.
-
-    // Read the optional initializer.
-    ExprAST *Init = 0;
-    if (CurTok == '=') {
-      getNextToken(); // eat the '='.
-      
-      Init = ParseExpression();
-      if (Init == 0) return 0;
-    }
-    
-    VarNames.push_back(std::make_pair(Name, Init));
-    
-    // End of var list, exit loop.
-    if (CurTok != ',') break;
-    getNextToken(); // eat the ','.
-    
-    if (CurTok != tok_identifier)
-      return Error("expected identifier list after var");
-  }
-</pre>
-</div>
-
-<p>Once all the variables are parsed, we then parse the body and create the
-AST node:</p>
-
-<div class="doc_code">
-<pre>
-  // At this point, we have to have 'in'.
-  if (CurTok != tok_in)
-    return Error("expected 'in' keyword after 'var'");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-  
-  return new VarExprAST(VarNames, Body);
-}
-</pre>
-</div>
-
-<p>Now that we can parse and represent the code, we need to support emission of
-LLVM IR for it.  This code starts out with:</p>
-
-<div class="doc_code">
-<pre>
-Value *VarExprAST::Codegen() {
-  std::vector&lt;AllocaInst *&gt; OldBindings;
-  
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-
-  // Register all variables and emit their initializer.
-  for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
-    const std::string &amp;VarName = VarNames[i].first;
-    ExprAST *Init = VarNames[i].second;
-</pre>
-</div>
-
-<p>Basically it loops over all the variables, installing them one at a time.
-For each variable we put into the symbol table, we remember the previous value
-that we replace in OldBindings.</p>
-
-<div class="doc_code">
-<pre>
-    // Emit the initializer before adding the variable to scope, this prevents
-    // the initializer from referencing the variable itself, and permits stuff
-    // like this:
-    //  var a = 1 in
-    //    var a = a in ...   # refers to outer 'a'.
-    Value *InitVal;
-    if (Init) {
-      InitVal = Init-&gt;Codegen();
-      if (InitVal == 0) return 0;
-    } else { // If not specified, use 0.0.
-      InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
-    }
-    
-    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
-    Builder.CreateStore(InitVal, Alloca);
-
-    // Remember the old variable binding so that we can restore the binding when
-    // we unrecurse.
-    OldBindings.push_back(NamedValues[VarName]);
-    
-    // Remember this binding.
-    NamedValues[VarName] = Alloca;
-  }
-</pre>
-</div>
-
-<p>There are more comments here than code.  The basic idea is that we emit the
-initializer, create the alloca, then update the symbol table to point to it.
-Once all the variables are installed in the symbol table, we evaluate the body
-of the var/in expression:</p>
-
-<div class="doc_code">
-<pre>
-  // Codegen the body, now that all vars are in scope.
-  Value *BodyVal = Body-&gt;Codegen();
-  if (BodyVal == 0) return 0;
-</pre>
-</div>
-
-<p>Finally, before returning, we restore the previous variable bindings:</p>
-
-<div class="doc_code">
-<pre>
-  // Pop all our variables from scope.
-  for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
-    NamedValues[VarNames[i].first] = OldBindings[i];
-
-  // Return the body computation.
-  return BodyVal;
-}
-</pre>
-</div>
-
-<p>The end result of all of this is that we get properly scoped variable 
-definitions, and we even (trivially) allow mutation of them :).</p>
-
-<p>With this, we completed what we set out to do.  Our nice iterative fib
-example from the intro compiles and runs just fine.  The mem2reg pass optimizes
-all of our stack variables into SSA registers, inserting PHI nodes where needed,
-and our front-end remains simple: no "iterated dominance frontier" computation
-anywhere in sight.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with mutable
-variables and var/in support.  To build this example, use:
-</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
-# Run
-./toy
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<div class="doc_code">
-<pre>
-#include "llvm/DerivedTypes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/TargetSelect.h"
-#include &lt;cstdio&gt;
-#include &lt;string&gt;
-#include &lt;map&gt;
-#include &lt;vector&gt;
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-// Lexer
-//===----------------------------------------------------------------------===//
-
-// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
-// of these for known things.
-enum Token {
-  tok_eof = -1,
-
-  // commands
-  tok_def = -2, tok_extern = -3,
-
-  // primary
-  tok_identifier = -4, tok_number = -5,
-  
-  // control
-  tok_if = -6, tok_then = -7, tok_else = -8,
-  tok_for = -9, tok_in = -10,
-  
-  // operators
-  tok_binary = -11, tok_unary = -12,
-  
-  // var definition
-  tok_var = -13
-};
-
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
-
-/// gettok - Return the next token from standard input.
-static int gettok() {
-  static int LastChar = ' ';
-
-  // Skip any whitespace.
-  while (isspace(LastChar))
-    LastChar = getchar();
-
-  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
-    IdentifierStr = LastChar;
-    while (isalnum((LastChar = getchar())))
-      IdentifierStr += LastChar;
-
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
-    if (IdentifierStr == "if") return tok_if;
-    if (IdentifierStr == "then") return tok_then;
-    if (IdentifierStr == "else") return tok_else;
-    if (IdentifierStr == "for") return tok_for;
-    if (IdentifierStr == "in") return tok_in;
-    if (IdentifierStr == "binary") return tok_binary;
-    if (IdentifierStr == "unary") return tok_unary;
-    if (IdentifierStr == "var") return tok_var;
-    return tok_identifier;
-  }
-
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
-    std::string NumStr;
-    do {
-      NumStr += LastChar;
-      LastChar = getchar();
-    } while (isdigit(LastChar) || LastChar == '.');
-
-    NumVal = strtod(NumStr.c_str(), 0);
-    return tok_number;
-  }
-
-  if (LastChar == '#') {
-    // Comment until end of line.
-    do LastChar = getchar();
-    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
-    
-    if (LastChar != EOF)
-      return gettok();
-  }
-  
-  // Check for end of file.  Don't eat the EOF.
-  if (LastChar == EOF)
-    return tok_eof;
-
-  // Otherwise, just return the character as its ascii value.
-  int ThisChar = LastChar;
-  LastChar = getchar();
-  return ThisChar;
-}
-
-//===----------------------------------------------------------------------===//
-// Abstract Syntax Tree (aka Parse Tree)
-//===----------------------------------------------------------------------===//
-
-/// ExprAST - Base class for all expression nodes.
-class ExprAST {
-public:
-  virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
-};
-
-/// NumberExprAST - Expression class for numeric literals like "1.0".
-class NumberExprAST : public ExprAST {
-  double Val;
-public:
-  NumberExprAST(double val) : Val(val) {}
-  virtual Value *Codegen();
-};
-
-/// VariableExprAST - Expression class for referencing a variable, like "a".
-class VariableExprAST : public ExprAST {
-  std::string Name;
-public:
-  VariableExprAST(const std::string &amp;name) : Name(name) {}
-  const std::string &amp;getName() const { return Name; }
-  virtual Value *Codegen();
-};
-
-/// UnaryExprAST - Expression class for a unary operator.
-class UnaryExprAST : public ExprAST {
-  char Opcode;
-  ExprAST *Operand;
-public:
-  UnaryExprAST(char opcode, ExprAST *operand) 
-    : Opcode(opcode), Operand(operand) {}
-  virtual Value *Codegen();
-};
-
-/// BinaryExprAST - Expression class for a binary operator.
-class BinaryExprAST : public ExprAST {
-  char Op;
-  ExprAST *LHS, *RHS;
-public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-  virtual Value *Codegen();
-};
-
-/// CallExprAST - Expression class for function calls.
-class CallExprAST : public ExprAST {
-  std::string Callee;
-  std::vector&lt;ExprAST*&gt; Args;
-public:
-  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
-    : Callee(callee), Args(args) {}
-  virtual Value *Codegen();
-};
-
-/// IfExprAST - Expression class for if/then/else.
-class IfExprAST : public ExprAST {
-  ExprAST *Cond, *Then, *Else;
-public:
-  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
-  : Cond(cond), Then(then), Else(_else) {}
-  virtual Value *Codegen();
-};
-
-/// ForExprAST - Expression class for for/in.
-class ForExprAST : public ExprAST {
-  std::string VarName;
-  ExprAST *Start, *End, *Step, *Body;
-public:
-  ForExprAST(const std::string &amp;varname, ExprAST *start, ExprAST *end,
-             ExprAST *step, ExprAST *body)
-    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  virtual Value *Codegen();
-};
-
-/// VarExprAST - Expression class for var/in
-class VarExprAST : public ExprAST {
-  std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; VarNames;
-  ExprAST *Body;
-public:
-  VarExprAST(const std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; &amp;varnames,
-             ExprAST *body)
-  : VarNames(varnames), Body(body) {}
-  
-  virtual Value *Codegen();
-};
-
-/// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its name, and its argument names (thus implicitly the number
-/// of arguments the function takes), as well as if it is an operator.
-class PrototypeAST {
-  std::string Name;
-  std::vector&lt;std::string&gt; Args;
-  bool isOperator;
-  unsigned Precedence;  // Precedence if a binary op.
-public:
-  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args,
-               bool isoperator = false, unsigned prec = 0)
-  : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
-  
-  bool isUnaryOp() const { return isOperator &amp;&amp; Args.size() == 1; }
-  bool isBinaryOp() const { return isOperator &amp;&amp; Args.size() == 2; }
-  
-  char getOperatorName() const {
-    assert(isUnaryOp() || isBinaryOp());
-    return Name[Name.size()-1];
-  }
-  
-  unsigned getBinaryPrecedence() const { return Precedence; }
-  
-  Function *Codegen();
-  
-  void CreateArgumentAllocas(Function *F);
-};
-
-/// FunctionAST - This class represents a function definition itself.
-class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
-public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-  Function *Codegen();
-};
-
-//===----------------------------------------------------------------------===//
-// Parser
-//===----------------------------------------------------------------------===//
-
-/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
-/// token the parser is looking at.  getNextToken reads another token from the
-/// lexer and updates CurTok with its results.
-static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
-
-/// BinopPrecedence - This holds the precedence for each binary operator that is
-/// defined.
-static std::map&lt;char, int&gt; BinopPrecedence;
-
-/// GetTokPrecedence - Get the precedence of the pending binary operator token.
-static int GetTokPrecedence() {
-  if (!isascii(CurTok))
-    return -1;
-  
-  // Make sure it's a declared binop.
-  int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec &lt;= 0) return -1;
-  return TokPrec;
-}
-
-/// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
-
-static ExprAST *ParseExpression();
-
-/// identifierexpr
-///   ::= identifier
-///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
-  std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
-  // Call.
-  getNextToken();  // eat (
-  std::vector&lt;ExprAST*&gt; Args;
-  if (CurTok != ')') {
-    while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
-
-      if (CurTok == ')') break;
-
-      if (CurTok != ',')
-        return Error("Expected ')' or ',' in argument list");
-      getNextToken();
-    }
-  }
-
-  // Eat the ')'.
-  getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
-}
-
-/// ifexpr ::= 'if' expression 'then' expression 'else' expression
-static ExprAST *ParseIfExpr() {
-  getNextToken();  // eat the if.
-  
-  // condition.
-  ExprAST *Cond = ParseExpression();
-  if (!Cond) return 0;
-  
-  if (CurTok != tok_then)
-    return Error("expected then");
-  getNextToken();  // eat the then
-  
-  ExprAST *Then = ParseExpression();
-  if (Then == 0) return 0;
-  
-  if (CurTok != tok_else)
-    return Error("expected else");
-  
-  getNextToken();
-  
-  ExprAST *Else = ParseExpression();
-  if (!Else) return 0;
-  
-  return new IfExprAST(Cond, Then, Else);
-}
-
-/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
-static ExprAST *ParseForExpr() {
-  getNextToken();  // eat the for.
-
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after for");
-  
-  std::string IdName = IdentifierStr;
-  getNextToken();  // eat identifier.
-  
-  if (CurTok != '=')
-    return Error("expected '=' after for");
-  getNextToken();  // eat '='.
-  
-  
-  ExprAST *Start = ParseExpression();
-  if (Start == 0) return 0;
-  if (CurTok != ',')
-    return Error("expected ',' after for start value");
-  getNextToken();
-  
-  ExprAST *End = ParseExpression();
-  if (End == 0) return 0;
-  
-  // The step value is optional.
-  ExprAST *Step = 0;
-  if (CurTok == ',') {
-    getNextToken();
-    Step = ParseExpression();
-    if (Step == 0) return 0;
-  }
-  
-  if (CurTok != tok_in)
-    return Error("expected 'in' after for");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-
-  return new ForExprAST(IdName, Start, End, Step, Body);
-}
-
-/// varexpr ::= 'var' identifier ('=' expression)? 
-//                    (',' identifier ('=' expression)?)* 'in' expression
-static ExprAST *ParseVarExpr() {
-  getNextToken();  // eat the var.
-
-  std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; VarNames;
-
-  // At least one variable name is required.
-  if (CurTok != tok_identifier)
-    return Error("expected identifier after var");
-  
-  while (1) {
-    std::string Name = IdentifierStr;
-    getNextToken();  // eat identifier.
-
-    // Read the optional initializer.
-    ExprAST *Init = 0;
-    if (CurTok == '=') {
-      getNextToken(); // eat the '='.
-      
-      Init = ParseExpression();
-      if (Init == 0) return 0;
-    }
-    
-    VarNames.push_back(std::make_pair(Name, Init));
-    
-    // End of var list, exit loop.
-    if (CurTok != ',') break;
-    getNextToken(); // eat the ','.
-    
-    if (CurTok != tok_identifier)
-      return Error("expected identifier list after var");
-  }
-  
-  // At this point, we have to have 'in'.
-  if (CurTok != tok_in)
-    return Error("expected 'in' keyword after 'var'");
-  getNextToken();  // eat 'in'.
-  
-  ExprAST *Body = ParseExpression();
-  if (Body == 0) return 0;
-  
-  return new VarExprAST(VarNames, Body);
-}
-
-/// primary
-///   ::= identifierexpr
-///   ::= numberexpr
-///   ::= parenexpr
-///   ::= ifexpr
-///   ::= forexpr
-///   ::= varexpr
-static ExprAST *ParsePrimary() {
-  switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
-  case tok_if:         return ParseIfExpr();
-  case tok_for:        return ParseForExpr();
-  case tok_var:        return ParseVarExpr();
-  }
-}
-
-/// unary
-///   ::= primary
-///   ::= '!' unary
-static ExprAST *ParseUnary() {
-  // If the current token is not an operator, it must be a primary expr.
-  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
-    return ParsePrimary();
-  
-  // If this is a unary operator, read it.
-  int Opc = CurTok;
-  getNextToken();
-  if (ExprAST *Operand = ParseUnary())
-    return new UnaryExprAST(Opc, Operand);
-  return 0;
-}
-
-/// binoprhs
-///   ::= ('+' unary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
-  // If this is a binop, find its precedence.
-  while (1) {
-    int TokPrec = GetTokPrecedence();
-    
-    // If this is a binop that binds at least as tightly as the current binop,
-    // consume it, otherwise we are done.
-    if (TokPrec &lt; ExprPrec)
-      return LHS;
-    
-    // Okay, we know this is a binop.
-    int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
-    // Parse the unary expression after the binary operator.
-    ExprAST *RHS = ParseUnary();
-    if (!RHS) return 0;
-    
-    // If BinOp binds less tightly with RHS than the operator after RHS, let
-    // the pending operator take RHS as its LHS.
-    int NextPrec = GetTokPrecedence();
-    if (TokPrec &lt; NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
-    }
-    
-    // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
-  }
-}
-
-/// expression
-///   ::= unary binoprhs
-///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParseUnary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
-}
-
-/// prototype
-///   ::= id '(' id* ')'
-///   ::= binary LETTER number? (id, id)
-///   ::= unary LETTER (id)
-static PrototypeAST *ParsePrototype() {
-  std::string FnName;
-  
-  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
-  unsigned BinaryPrecedence = 30;
-  
-  switch (CurTok) {
-  default:
-    return ErrorP("Expected function name in prototype");
-  case tok_identifier:
-    FnName = IdentifierStr;
-    Kind = 0;
-    getNextToken();
-    break;
-  case tok_unary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected unary operator");
-    FnName = "unary";
-    FnName += (char)CurTok;
-    Kind = 1;
-    getNextToken();
-    break;
-  case tok_binary:
-    getNextToken();
-    if (!isascii(CurTok))
-      return ErrorP("Expected binary operator");
-    FnName = "binary";
-    FnName += (char)CurTok;
-    Kind = 2;
-    getNextToken();
-    
-    // Read the precedence if present.
-    if (CurTok == tok_number) {
-      if (NumVal &lt; 1 || NumVal &gt; 100)
-        return ErrorP("Invalid precedecnce: must be 1..100");
-      BinaryPrecedence = (unsigned)NumVal;
-      getNextToken();
-    }
-    break;
-  }
-  
-  if (CurTok != '(')
-    return ErrorP("Expected '(' in prototype");
-  
-  std::vector&lt;std::string&gt; ArgNames;
-  while (getNextToken() == tok_identifier)
-    ArgNames.push_back(IdentifierStr);
-  if (CurTok != ')')
-    return ErrorP("Expected ')' in prototype");
-  
-  // success.
-  getNextToken();  // eat ')'.
-  
-  // Verify right number of names for operator.
-  if (Kind &amp;&amp; ArgNames.size() != Kind)
-    return ErrorP("Invalid number of operands for operator");
-  
-  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
-}
-
-/// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
-
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
-}
-
-/// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
-    // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
-    return new FunctionAST(Proto, E);
-  }
-  return 0;
-}
-
-/// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
-  return ParsePrototype();
-}
-
-//===----------------------------------------------------------------------===//
-// Code Generation
-//===----------------------------------------------------------------------===//
-
-static Module *TheModule;
-static IRBuilder&lt;&gt; Builder(getGlobalContext());
-static std::map&lt;std::string, AllocaInst*&gt; NamedValues;
-static FunctionPassManager *TheFPM;
-
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
-/// the function.  This is used for mutable variables etc.
-static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
-                                          const std::string &amp;VarName) {
-  IRBuilder&lt;&gt; TmpB(&amp;TheFunction-&gt;getEntryBlock(),
-                 TheFunction-&gt;getEntryBlock().begin());
-  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
-                           VarName.c_str());
-}
-
-Value *NumberExprAST::Codegen() {
-  return ConstantFP::get(getGlobalContext(), APFloat(Val));
-}
-
-Value *VariableExprAST::Codegen() {
-  // Look this variable up in the function.
-  Value *V = NamedValues[Name];
-  if (V == 0) return ErrorV("Unknown variable name");
-
-  // Load the value.
-  return Builder.CreateLoad(V, Name.c_str());
-}
-
-Value *UnaryExprAST::Codegen() {
-  Value *OperandV = Operand-&gt;Codegen();
-  if (OperandV == 0) return 0;
-  
-  Function *F = TheModule-&gt;getFunction(std::string("unary")+Opcode);
-  if (F == 0)
-    return ErrorV("Unknown unary operator");
-  
-  return Builder.CreateCall(F, OperandV, "unop");
-}
-
-Value *BinaryExprAST::Codegen() {
-  // Special case '=' because we don't want to emit the LHS as an expression.
-  if (Op == '=') {
-    // Assignment requires the LHS to be an identifier.
-    VariableExprAST *LHSE = dynamic_cast&lt;VariableExprAST*&gt;(LHS);
-    if (!LHSE)
-      return ErrorV("destination of '=' must be a variable");
-    // Codegen the RHS.
-    Value *Val = RHS-&gt;Codegen();
-    if (Val == 0) return 0;
-
-    // Look up the name.
-    Value *Variable = NamedValues[LHSE-&gt;getName()];
-    if (Variable == 0) return ErrorV("Unknown variable name");
-
-    Builder.CreateStore(Val, Variable);
-    return Val;
-  }
-  
-  Value *L = LHS-&gt;Codegen();
-  Value *R = RHS-&gt;Codegen();
-  if (L == 0 || R == 0) return 0;
-  
-  switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
-  case '&lt;':
-    L = Builder.CreateFCmpULT(L, R, "cmptmp");
-    // Convert bool 0/1 to double 0.0 or 1.0
-    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
-                                "booltmp");
-  default: break;
-  }
-  
-  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
-  // a call to it.
-  Function *F = TheModule-&gt;getFunction(std::string("binary")+Op);
-  assert(F &amp;&amp; "binary operator not found!");
-  
-  Value *Ops[2] = { L, R };
-  return Builder.CreateCall(F, Ops, "binop");
-}
-
-Value *CallExprAST::Codegen() {
-  // Look up the name in the global module table.
-  Function *CalleeF = TheModule-&gt;getFunction(Callee);
-  if (CalleeF == 0)
-    return ErrorV("Unknown function referenced");
-  
-  // If argument mismatch error.
-  if (CalleeF-&gt;arg_size() != Args.size())
-    return ErrorV("Incorrect # arguments passed");
-
-  std::vector&lt;Value*&gt; ArgsV;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]-&gt;Codegen());
-    if (ArgsV.back() == 0) return 0;
-  }
-  
-  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
-}
-
-Value *IfExprAST::Codegen() {
-  Value *CondV = Cond-&gt;Codegen();
-  if (CondV == 0) return 0;
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  CondV = Builder.CreateFCmpONE(CondV, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                "ifcond");
-  
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-  
-  // Create blocks for the then and else cases.  Insert the 'then' block at the
-  // end of the function.
-  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
-  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
-  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
-  
-  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
-  
-  // Emit then value.
-  Builder.SetInsertPoint(ThenBB);
-  
-  Value *ThenV = Then-&gt;Codegen();
-  if (ThenV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
-  ThenBB = Builder.GetInsertBlock();
-  
-  // Emit else block.
-  TheFunction-&gt;getBasicBlockList().push_back(ElseBB);
-  Builder.SetInsertPoint(ElseBB);
-  
-  Value *ElseV = Else-&gt;Codegen();
-  if (ElseV == 0) return 0;
-  
-  Builder.CreateBr(MergeBB);
-  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
-  ElseBB = Builder.GetInsertBlock();
-  
-  // Emit merge block.
-  TheFunction-&gt;getBasicBlockList().push_back(MergeBB);
-  Builder.SetInsertPoint(MergeBB);
-  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
-                                  "iftmp");
-  
-  PN-&gt;addIncoming(ThenV, ThenBB);
-  PN-&gt;addIncoming(ElseV, ElseBB);
-  return PN;
-}
-
-Value *ForExprAST::Codegen() {
-  // Output this as:
-  //   var = alloca double
-  //   ...
-  //   start = startexpr
-  //   store start -&gt; var
-  //   goto loop
-  // loop: 
-  //   ...
-  //   bodyexpr
-  //   ...
-  // loopend:
-  //   step = stepexpr
-  //   endcond = endexpr
-  //
-  //   curvar = load var
-  //   nextvar = curvar + step
-  //   store nextvar -&gt; var
-  //   br endcond, loop, endloop
-  // outloop:
-  
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-
-  // Create an alloca for the variable in the entry block.
-  AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
-  
-  // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start-&gt;Codegen();
-  if (StartVal == 0) return 0;
-  
-  // Store the value into the alloca.
-  Builder.CreateStore(StartVal, Alloca);
-  
-  // Make the new basic block for the loop header, inserting after current
-  // block.
-  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
-  
-  // Insert an explicit fall through from the current block to the LoopBB.
-  Builder.CreateBr(LoopBB);
-
-  // Start insertion in LoopBB.
-  Builder.SetInsertPoint(LoopBB);
-  
-  // Within the loop, the variable is defined equal to the PHI node.  If it
-  // shadows an existing variable, we have to restore it, so save it now.
-  AllocaInst *OldVal = NamedValues[VarName];
-  NamedValues[VarName] = Alloca;
-  
-  // Emit the body of the loop.  This, like any other expr, can change the
-  // current BB.  Note that we ignore the value computed by the body, but don't
-  // allow an error.
-  if (Body-&gt;Codegen() == 0)
-    return 0;
-  
-  // Emit the step value.
-  Value *StepVal;
-  if (Step) {
-    StepVal = Step-&gt;Codegen();
-    if (StepVal == 0) return 0;
-  } else {
-    // If not specified, use 1.0.
-    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
-  }
-  
-  // Compute the end condition.
-  Value *EndCond = End-&gt;Codegen();
-  if (EndCond == 0) return EndCond;
-  
-  // Reload, increment, and restore the alloca.  This handles the case where
-  // the body of the loop mutates the variable.
-  Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str());
-  Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
-  Builder.CreateStore(NextVar, Alloca);
-  
-  // Convert condition to a bool by comparing equal to 0.0.
-  EndCond = Builder.CreateFCmpONE(EndCond, 
-                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                  "loopcond");
-  
-  // Create the "after loop" block and insert it.
-  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
-  
-  // Insert the conditional branch into the end of LoopEndBB.
-  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
-  
-  // Any new code will be inserted in AfterBB.
-  Builder.SetInsertPoint(AfterBB);
-  
-  // Restore the unshadowed variable.
-  if (OldVal)
-    NamedValues[VarName] = OldVal;
-  else
-    NamedValues.erase(VarName);
-
-  
-  // for expr always returns 0.0.
-  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
-}
-
-Value *VarExprAST::Codegen() {
-  std::vector&lt;AllocaInst *&gt; OldBindings;
-  
-  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
-
-  // Register all variables and emit their initializer.
-  for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
-    const std::string &amp;VarName = VarNames[i].first;
-    ExprAST *Init = VarNames[i].second;
-    
-    // Emit the initializer before adding the variable to scope, this prevents
-    // the initializer from referencing the variable itself, and permits stuff
-    // like this:
-    //  var a = 1 in
-    //    var a = a in ...   # refers to outer 'a'.
-    Value *InitVal;
-    if (Init) {
-      InitVal = Init-&gt;Codegen();
-      if (InitVal == 0) return 0;
-    } else { // If not specified, use 0.0.
-      InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
-    }
-    
-    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
-    Builder.CreateStore(InitVal, Alloca);
-
-    // Remember the old variable binding so that we can restore the binding when
-    // we unrecurse.
-    OldBindings.push_back(NamedValues[VarName]);
-    
-    // Remember this binding.
-    NamedValues[VarName] = Alloca;
-  }
-  
-  // Codegen the body, now that all vars are in scope.
-  Value *BodyVal = Body-&gt;Codegen();
-  if (BodyVal == 0) return 0;
-  
-  // Pop all our variables from scope.
-  for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
-    NamedValues[VarNames[i].first] = OldBindings[i];
-
-  // Return the body computation.
-  return BodyVal;
-}
-
-Function *PrototypeAST::Codegen() {
-  // Make the function type:  double(double,double) etc.
-  std::vector&lt;Type*&gt; Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-  
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-  
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F-&gt;getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F-&gt;eraseFromParent();
-    F = TheModule-&gt;getFunction(Name);
-    
-    // If F already has a body, reject this.
-    if (!F-&gt;empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F-&gt;arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-  
-  // Set names for all arguments.
-  unsigned Idx = 0;
-  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
-       ++AI, ++Idx)
-    AI-&gt;setName(Args[Idx]);
-    
-  return F;
-}
-
-/// CreateArgumentAllocas - Create an alloca for each argument and register the
-/// argument in the symbol table so that references to it will succeed.
-void PrototypeAST::CreateArgumentAllocas(Function *F) {
-  Function::arg_iterator AI = F-&gt;arg_begin();
-  for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
-    // Create an alloca for this variable.
-    AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
-
-    // Store the initial value into the alloca.
-    Builder.CreateStore(AI, Alloca);
-
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = Alloca;
-  }
-}
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto-&gt;Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
-  // If this is an operator, install it.
-  if (Proto-&gt;isBinaryOp())
-    BinopPrecedence[Proto-&gt;getOperatorName()] = Proto-&gt;getBinaryPrecedence();
-  
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
-  
-  // Add all arguments to the symbol table and create their allocas.
-  Proto-&gt;CreateArgumentAllocas(TheFunction);
-
-  if (Value *RetVal = Body-&gt;Codegen()) {
-    // Finish off the function.
-    Builder.CreateRet(RetVal);
-
-    // Validate the generated code, checking for consistency.
-    verifyFunction(*TheFunction);
-
-    // Optimize the function.
-    TheFPM-&gt;run(*TheFunction);
-    
-    return TheFunction;
-  }
-  
-  // Error reading body, remove function.
-  TheFunction-&gt;eraseFromParent();
-
-  if (Proto-&gt;isBinaryOp())
-    BinopPrecedence.erase(Proto-&gt;getOperatorName());
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Top-Level parsing and JIT Driver
-//===----------------------------------------------------------------------===//
-
-static ExecutionEngine *TheExecutionEngine;
-
-static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      fprintf(stderr, "Read function definition:");
-      LF-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P-&gt;Codegen()) {
-      fprintf(stderr, "Read extern: ");
-      F-&gt;dump();
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-static void HandleTopLevelExpression() {
-  // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F-&gt;Codegen()) {
-      // JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine-&gt;getPointerToFunction(LF);
-      
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
-      fprintf(stderr, "Evaluated to %f\n", FP());
-    }
-  } else {
-    // Skip token for error recovery.
-    getNextToken();
-  }
-}
-
-/// top ::= definition | external | expression | ';'
-static void MainLoop() {
-  while (1) {
-    fprintf(stderr, "ready&gt; ");
-    switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
-    }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// "Library" functions that can be "extern'd" from user code.
-//===----------------------------------------------------------------------===//
-
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-/// printd - printf that takes a double prints it as "%f\n", returning 0.
-extern "C" 
-double printd(double X) {
-  printf("%f\n", X);
-  return 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Main driver code.
-//===----------------------------------------------------------------------===//
-
-int main() {
-  InitializeNativeTarget();
-  LLVMContext &amp;Context = getGlobalContext();
-
-  // Install standard binary operators.
-  // 1 is lowest precedence.
-  BinopPrecedence['='] = 2;
-  BinopPrecedence['&lt;'] = 10;
-  BinopPrecedence['+'] = 20;
-  BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
-
-  // Prime the first token.
-  fprintf(stderr, "ready&gt; ");
-  getNextToken();
-
-  // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit", Context);
-
-  // Create the JIT.  This takes ownership of the module.
-  std::string ErrStr;
-  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&amp;ErrStr).create();
-  if (!TheExecutionEngine) {
-    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
-    exit(1);
-  }
-
-  FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  OurFPM.add(new DataLayout(*TheExecutionEngine-&gt;getDataLayout()));
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Promote allocas to registers.
-  OurFPM.add(createPromoteMemoryToRegisterPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &amp;OurFPM;
-
-  // Run the main "interpreter loop" now.
-  MainLoop();
-
-  TheFPM = 0;
-
-  // Print out all of the generated code.
-  TheModule-&gt;dump();
-
-  return 0;
-}
-</pre>
-</div>
-
-<a href="LangImpl8.html">Next: Conclusion and other useful LLVM tidbits</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/LangImpl7.rst b/docs/tutorial/LangImpl7.rst
new file mode 100644
index 0000000000..602dcb5f6f
--- /dev/null
+++ b/docs/tutorial/LangImpl7.rst
@@ -0,0 +1,2005 @@
+=======================================================
+Kaleidoscope: Extending the Language: Mutable Variables
+=======================================================
+
+.. contents::
+   :local:
+
+Written by `Chris Lattner <mailto:sabre@nondot.org>`_
+
+Chapter 7 Introduction
+======================
+
+Welcome to Chapter 7 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. In chapters 1 through 6, we've built a
+very respectable, albeit simple, `functional programming
+language <http://en.wikipedia.org/wiki/Functional_programming>`_. In our
+journey, we learned some parsing techniques, how to build and represent
+an AST, how to build LLVM IR, and how to optimize the resultant code as
+well as JIT compile it.
+
+While Kaleidoscope is interesting as a functional language, the fact
+that it is functional makes it "too easy" to generate LLVM IR for it. In
+particular, a functional language makes it very easy to build LLVM IR
+directly in `SSA
+form <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_.
+Since LLVM requires that the input code be in SSA form, this is a very
+nice property and it is often unclear to newcomers how to generate code
+for an imperative language with mutable variables.
+
+The short (and happy) summary of this chapter is that there is no need
+for your front-end to build SSA form: LLVM provides highly tuned and
+well tested support for this, though the way it works is a bit
+unexpected for some.
+
+Why is this a hard problem?
+===========================
+
+To understand why mutable variables cause complexities in SSA
+construction, consider this extremely simple C example:
+
+.. code-block:: c
+
+    int G, H;
+    int test(_Bool Condition) {
+      int X;
+      if (Condition)
+        X = G;
+      else
+        X = H;
+      return X;
+    }
+
+In this case, we have the variable "X", whose value depends on the path
+executed in the program. Because there are two different possible values
+for X before the return instruction, a PHI node is inserted to merge the
+two values. The LLVM IR that we want for this example looks like this:
+
+.. code-block:: llvm
+
+    @G = weak global i32 0   ; type of @G is i32*
+    @H = weak global i32 0   ; type of @H is i32*
+
+    define i32 @test(i1 %Condition) {
+    entry:
+      br i1 %Condition, label %cond_true, label %cond_false
+
+    cond_true:
+      %X.0 = load i32* @G
+      br label %cond_next
+
+    cond_false:
+      %X.1 = load i32* @H
+      br label %cond_next
+
+    cond_next:
+      %X.2 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
+      ret i32 %X.2
+    }
+
+In this example, the loads from the G and H global variables are
+explicit in the LLVM IR, and they live in the then/else branches of the
+if statement (cond\_true/cond\_false). In order to merge the incoming
+values, the X.2 phi node in the cond\_next block selects the right value
+to use based on where control flow is coming from: if control flow comes
+from the cond\_false block, X.2 gets the value of X.1. Alternatively, if
+control flow comes from cond\_true, it gets the value of X.0. The intent
+of this chapter is not to explain the details of SSA form. For more
+information, see one of the many `online
+references <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_.
+
+The question for this article is "who places the phi nodes when lowering
+assignments to mutable variables?". The issue here is that LLVM
+*requires* that its IR be in SSA form: there is no "non-ssa" mode for
+it. However, SSA construction requires non-trivial algorithms and data
+structures, so it is inconvenient and wasteful for every front-end to
+have to reproduce this logic.
+
+Memory in LLVM
+==============
+
+The 'trick' here is that while LLVM does require all register values to
+be in SSA form, it does not require (or permit) memory objects to be in
+SSA form. In the example above, note that the loads from G and H are
+direct accesses to G and H: they are not renamed or versioned. This
+differs from some other compiler systems, which do try to version memory
+objects. In LLVM, instead of encoding dataflow analysis of memory into
+the LLVM IR, it is handled with `Analysis
+Passes <../WritingAnLLVMPass.html>`_ which are computed on demand.
+
+With this in mind, the high-level idea is that we want to make a stack
+variable (which lives in memory, because it is on the stack) for each
+mutable object in a function. To take advantage of this trick, we need
+to talk about how LLVM represents stack variables.
+
+In LLVM, all memory accesses are explicit with load/store instructions,
+and it is carefully designed not to have (or need) an "address-of"
+operator. Notice how the type of the @G/@H global variables is actually
+"i32\*" even though the variable is defined as "i32". What this means is
+that @G defines *space* for an i32 in the global data area, but its
+*name* actually refers to the address for that space. Stack variables
+work the same way, except that instead of being declared with global
+variable definitions, they are declared with the `LLVM alloca
+instruction <../LangRef.html#i_alloca>`_:
+
+.. code-block:: llvm
+
+    define i32 @example() {
+    entry:
+      %X = alloca i32           ; type of %X is i32*.
+      ...
+      %tmp = load i32* %X       ; load the stack value %X from the stack.
+      %tmp2 = add i32 %tmp, 1   ; increment it
+      store i32 %tmp2, i32* %X  ; store it back
+      ...
+
+This code shows an example of how you can declare and manipulate a stack
+variable in the LLVM IR. Stack memory allocated with the alloca
+instruction is fully general: you can pass the address of the stack slot
+to functions, you can store it in other variables, etc. In our example
+above, we could rewrite the example to use the alloca technique to avoid
+using a PHI node:
+
+.. code-block:: llvm
+
+    @G = weak global i32 0   ; type of @G is i32*
+    @H = weak global i32 0   ; type of @H is i32*
+
+    define i32 @test(i1 %Condition) {
+    entry:
+      %X = alloca i32           ; type of %X is i32*.
+      br i1 %Condition, label %cond_true, label %cond_false
+
+    cond_true:
+      %X.0 = load i32* @G
+      store i32 %X.0, i32* %X   ; Update X
+      br label %cond_next
+
+    cond_false:
+      %X.1 = load i32* @H
+      store i32 %X.1, i32* %X   ; Update X
+      br label %cond_next
+
+    cond_next:
+      %X.2 = load i32* %X       ; Read X
+      ret i32 %X.2
+    }
+
+With this, we have discovered a way to handle arbitrary mutable
+variables without the need to create Phi nodes at all:
+
+#. Each mutable variable becomes a stack allocation.
+#. Each read of the variable becomes a load from the stack.
+#. Each update of the variable becomes a store to the stack.
+#. Taking the address of a variable just uses the stack address
+   directly.
+
+While this solution has solved our immediate problem, it introduced
+another one: we have now apparently introduced a lot of stack traffic
+for very simple and common operations, a major performance problem.
+Fortunately for us, the LLVM optimizer has a highly-tuned optimization
+pass named "mem2reg" that handles this case, promoting allocas like this
+into SSA registers, inserting Phi nodes as appropriate. If you run this
+example through the pass, for example, you'll get:
+
+.. code-block:: bash
+
+    $ llvm-as < example.ll | opt -mem2reg | llvm-dis
+    @G = weak global i32 0
+    @H = weak global i32 0
+
+    define i32 @test(i1 %Condition) {
+    entry:
+      br i1 %Condition, label %cond_true, label %cond_false
+
+    cond_true:
+      %X.0 = load i32* @G
+      br label %cond_next
+
+    cond_false:
+      %X.1 = load i32* @H
+      br label %cond_next
+
+    cond_next:
+      %X.01 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
+      ret i32 %X.01
+    }
+
+The mem2reg pass implements the standard "iterated dominance frontier"
+algorithm for constructing SSA form and has a number of optimizations
+that speed up (very common) degenerate cases. The mem2reg optimization
+pass is the answer to dealing with mutable variables, and we highly
+recommend that you depend on it. Note that mem2reg only works on
+variables in certain circumstances:
+
+#. mem2reg is alloca-driven: it looks for allocas and if it can handle
+   them, it promotes them. It does not apply to global variables or heap
+   allocations.
+#. mem2reg only looks for alloca instructions in the entry block of the
+   function. Being in the entry block guarantees that the alloca is only
+   executed once, which makes analysis simpler.
+#. mem2reg only promotes allocas whose uses are direct loads and stores.
+   If the address of the stack object is passed to a function, or if any
+   funny pointer arithmetic is involved, the alloca will not be
+   promoted.
+#. mem2reg only works on allocas of `first
+   class <../LangRef.html#t_classifications>`_ values (such as pointers,
+   scalars and vectors), and only if the array size of the allocation is
+   1 (or missing in the .ll file). mem2reg is not capable of promoting
+   structs or arrays to registers. Note that the "scalarrepl" pass is
+   more powerful and can promote structs, "unions", and arrays in many
+   cases.
+
+All of these properties are easy to satisfy for most imperative
+languages, and we'll illustrate it below with Kaleidoscope. The final
+question you may be asking is: should I bother with this nonsense for my
+front-end? Wouldn't it be better if I just did SSA construction
+directly, avoiding use of the mem2reg optimization pass? In short, we
+strongly recommend that you use this technique for building SSA form,
+unless there is an extremely good reason not to. Using this technique
+is:
+
+-  Proven and well tested: llvm-gcc and clang both use this technique
+   for local mutable variables. As such, the most common clients of LLVM
+   are using this to handle a bulk of their variables. You can be sure
+   that bugs are found fast and fixed early.
+-  Extremely Fast: mem2reg has a number of special cases that make it
+   fast in common cases as well as fully general. For example, it has
+   fast-paths for variables that are only used in a single block,
+   variables that only have one assignment point, good heuristics to
+   avoid insertion of unneeded phi nodes, etc.
+-  Needed for debug info generation: `Debug information in
+   LLVM <../SourceLevelDebugging.html>`_ relies on having the address of
+   the variable exposed so that debug info can be attached to it. This
+   technique dovetails very naturally with this style of debug info.
+
+If nothing else, this makes it much easier to get your front-end up and
+running, and is very simple to implement. Lets extend Kaleidoscope with
+mutable variables now!
+
+Mutable Variables in Kaleidoscope
+=================================
+
+Now that we know the sort of problem we want to tackle, lets see what
+this looks like in the context of our little Kaleidoscope language.
+We're going to add two features:
+
+#. The ability to mutate variables with the '=' operator.
+#. The ability to define new variables.
+
+While the first item is really what this is about, we only have
+variables for incoming arguments as well as for induction variables, and
+redefining those only goes so far :). Also, the ability to define new
+variables is a useful thing regardless of whether you will be mutating
+them. Here's a motivating example that shows how we could use these:
+
+::
+
+    # Define ':' for sequencing: as a low-precedence operator that ignores operands
+    # and just returns the RHS.
+    def binary : 1 (x y) y;
+
+    # Recursive fib, we could do this before.
+    def fib(x)
+      if (x < 3) then
+        1
+      else
+        fib(x-1)+fib(x-2);
+
+    # Iterative fib.
+    def fibi(x)
+      var a = 1, b = 1, c in
+      (for i = 3, i < x in
+         c = a + b :
+         a = b :
+         b = c) :
+      b;
+
+    # Call it.
+    fibi(10);
+
+In order to mutate variables, we have to change our existing variables
+to use the "alloca trick". Once we have that, we'll add our new
+operator, then extend Kaleidoscope to support new variable definitions.
+
+Adjusting Existing Variables for Mutation
+=========================================
+
+The symbol table in Kaleidoscope is managed at code generation time by
+the '``NamedValues``' map. This map currently keeps track of the LLVM
+"Value\*" that holds the double value for the named variable. In order
+to support mutation, we need to change this slightly, so that it
+``NamedValues`` holds the *memory location* of the variable in question.
+Note that this change is a refactoring: it changes the structure of the
+code, but does not (by itself) change the behavior of the compiler. All
+of these changes are isolated in the Kaleidoscope code generator.
+
+At this point in Kaleidoscope's development, it only supports variables
+for two things: incoming arguments to functions and the induction
+variable of 'for' loops. For consistency, we'll allow mutation of these
+variables in addition to other user-defined variables. This means that
+these will both need memory locations.
+
+To start our transformation of Kaleidoscope, we'll change the
+NamedValues map so that it maps to AllocaInst\* instead of Value\*. Once
+we do this, the C++ compiler will tell us what parts of the code we need
+to update:
+
+.. code-block:: c++
+
+    static std::map<std::string, AllocaInst*> NamedValues;
+
+Also, since we will need to create these alloca's, we'll use a helper
+function that ensures that the allocas are created in the entry block of
+the function:
+
+.. code-block:: c++
+
+    /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
+    /// the function.  This is used for mutable variables etc.
+    static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
+                                              const std::string &VarName) {
+      IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
+                     TheFunction->getEntryBlock().begin());
+      return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
+                               VarName.c_str());
+    }
+
+This funny looking code creates an IRBuilder object that is pointing at
+the first instruction (.begin()) of the entry block. It then creates an
+alloca with the expected name and returns it. Because all values in
+Kaleidoscope are doubles, there is no need to pass in a type to use.
+
+With this in place, the first functionality change we want to make is to
+variable references. In our new scheme, variables live on the stack, so
+code generating a reference to them actually needs to produce a load
+from the stack slot:
+
+.. code-block:: c++
+
+    Value *VariableExprAST::Codegen() {
+      // Look this variable up in the function.
+      Value *V = NamedValues[Name];
+      if (V == 0) return ErrorV("Unknown variable name");
+
+      // Load the value.
+      return Builder.CreateLoad(V, Name.c_str());
+    }
+
+As you can see, this is pretty straightforward. Now we need to update
+the things that define the variables to set up the alloca. We'll start
+with ``ForExprAST::Codegen`` (see the `full code listing <#code>`_ for
+the unabridged code):
+
+.. code-block:: c++
+
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+      // Create an alloca for the variable in the entry block.
+      AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+
+        // Emit the start code first, without 'variable' in scope.
+      Value *StartVal = Start->Codegen();
+      if (StartVal == 0) return 0;
+
+      // Store the value into the alloca.
+      Builder.CreateStore(StartVal, Alloca);
+      ...
+
+      // Compute the end condition.
+      Value *EndCond = End->Codegen();
+      if (EndCond == 0) return EndCond;
+
+      // Reload, increment, and restore the alloca.  This handles the case where
+      // the body of the loop mutates the variable.
+      Value *CurVar = Builder.CreateLoad(Alloca);
+      Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
+      Builder.CreateStore(NextVar, Alloca);
+      ...
+
+This code is virtually identical to the code `before we allowed mutable
+variables <LangImpl5.html#forcodegen>`_. The big difference is that we
+no longer have to construct a PHI node, and we use load/store to access
+the variable as needed.
+
+To support mutable argument variables, we need to also make allocas for
+them. The code for this is also pretty simple:
+
+.. code-block:: c++
+
+    /// CreateArgumentAllocas - Create an alloca for each argument and register the
+    /// argument in the symbol table so that references to it will succeed.
+    void PrototypeAST::CreateArgumentAllocas(Function *F) {
+      Function::arg_iterator AI = F->arg_begin();
+      for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
+        // Create an alloca for this variable.
+        AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
+
+        // Store the initial value into the alloca.
+        Builder.CreateStore(AI, Alloca);
+
+        // Add arguments to variable symbol table.
+        NamedValues[Args[Idx]] = Alloca;
+      }
+    }
+
+For each argument, we make an alloca, store the input value to the
+function into the alloca, and register the alloca as the memory location
+for the argument. This method gets invoked by ``FunctionAST::Codegen``
+right after it sets up the entry block for the function.
+
+The final missing piece is adding the mem2reg pass, which allows us to
+get good codegen once again:
+
+.. code-block:: c++
+
+        // Set up the optimizer pipeline.  Start with registering info about how the
+        // target lays out data structures.
+        OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
+        // Promote allocas to registers.
+        OurFPM.add(createPromoteMemoryToRegisterPass());
+        // Do simple "peephole" optimizations and bit-twiddling optzns.
+        OurFPM.add(createInstructionCombiningPass());
+        // Reassociate expressions.
+        OurFPM.add(createReassociatePass());
+
+It is interesting to see what the code looks like before and after the
+mem2reg optimization runs. For example, this is the before/after code
+for our recursive fib function. Before the optimization:
+
+.. code-block:: llvm
+
+    define double @fib(double %x) {
+    entry:
+      %x1 = alloca double
+      store double %x, double* %x1
+      %x2 = load double* %x1
+      %cmptmp = fcmp ult double %x2, 3.000000e+00
+      %booltmp = uitofp i1 %cmptmp to double
+      %ifcond = fcmp one double %booltmp, 0.000000e+00
+      br i1 %ifcond, label %then, label %else
+
+    then:       ; preds = %entry
+      br label %ifcont
+
+    else:       ; preds = %entry
+      %x3 = load double* %x1
+      %subtmp = fsub double %x3, 1.000000e+00
+      %calltmp = call double @fib(double %subtmp)
+      %x4 = load double* %x1
+      %subtmp5 = fsub double %x4, 2.000000e+00
+      %calltmp6 = call double @fib(double %subtmp5)
+      %addtmp = fadd double %calltmp, %calltmp6
+      br label %ifcont
+
+    ifcont:     ; preds = %else, %then
+      %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
+      ret double %iftmp
+    }
+
+Here there is only one variable (x, the input argument) but you can
+still see the extremely simple-minded code generation strategy we are
+using. In the entry block, an alloca is created, and the initial input
+value is stored into it. Each reference to the variable does a reload
+from the stack. Also, note that we didn't modify the if/then/else
+expression, so it still inserts a PHI node. While we could make an
+alloca for it, it is actually easier to create a PHI node for it, so we
+still just make the PHI.
+
+Here is the code after the mem2reg pass runs:
+
+.. code-block:: llvm
+
+    define double @fib(double %x) {
+    entry:
+      %cmptmp = fcmp ult double %x, 3.000000e+00
+      %booltmp = uitofp i1 %cmptmp to double
+      %ifcond = fcmp one double %booltmp, 0.000000e+00
+      br i1 %ifcond, label %then, label %else
+
+    then:
+      br label %ifcont
+
+    else:
+      %subtmp = fsub double %x, 1.000000e+00
+      %calltmp = call double @fib(double %subtmp)
+      %subtmp5 = fsub double %x, 2.000000e+00
+      %calltmp6 = call double @fib(double %subtmp5)
+      %addtmp = fadd double %calltmp, %calltmp6
+      br label %ifcont
+
+    ifcont:     ; preds = %else, %then
+      %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
+      ret double %iftmp
+    }
+
+This is a trivial case for mem2reg, since there are no redefinitions of
+the variable. The point of showing this is to calm your tension about
+inserting such blatent inefficiencies :).
+
+After the rest of the optimizers run, we get:
+
+.. code-block:: llvm
+
+    define double @fib(double %x) {
+    entry:
+      %cmptmp = fcmp ult double %x, 3.000000e+00
+      %booltmp = uitofp i1 %cmptmp to double
+      %ifcond = fcmp ueq double %booltmp, 0.000000e+00
+      br i1 %ifcond, label %else, label %ifcont
+
+    else:
+      %subtmp = fsub double %x, 1.000000e+00
+      %calltmp = call double @fib(double %subtmp)
+      %subtmp5 = fsub double %x, 2.000000e+00
+      %calltmp6 = call double @fib(double %subtmp5)
+      %addtmp = fadd double %calltmp, %calltmp6
+      ret double %addtmp
+
+    ifcont:
+      ret double 1.000000e+00
+    }
+
+Here we see that the simplifycfg pass decided to clone the return
+instruction into the end of the 'else' block. This allowed it to
+eliminate some branches and the PHI node.
+
+Now that all symbol table references are updated to use stack variables,
+we'll add the assignment operator.
+
+New Assignment Operator
+=======================
+
+With our current framework, adding a new assignment operator is really
+simple. We will parse it just like any other binary operator, but handle
+it internally (instead of allowing the user to define it). The first
+step is to set a precedence:
+
+.. code-block:: c++
+
+     int main() {
+       // Install standard binary operators.
+       // 1 is lowest precedence.
+       BinopPrecedence['='] = 2;
+       BinopPrecedence['<'] = 10;
+       BinopPrecedence['+'] = 20;
+       BinopPrecedence['-'] = 20;
+
+Now that the parser knows the precedence of the binary operator, it
+takes care of all the parsing and AST generation. We just need to
+implement codegen for the assignment operator. This looks like:
+
+.. code-block:: c++
+
+    Value *BinaryExprAST::Codegen() {
+      // Special case '=' because we don't want to emit the LHS as an expression.
+      if (Op == '=') {
+        // Assignment requires the LHS to be an identifier.
+        VariableExprAST *LHSE = dynamic_cast<VariableExprAST*>(LHS);
+        if (!LHSE)
+          return ErrorV("destination of '=' must be a variable");
+
+Unlike the rest of the binary operators, our assignment operator doesn't
+follow the "emit LHS, emit RHS, do computation" model. As such, it is
+handled as a special case before the other binary operators are handled.
+The other strange thing is that it requires the LHS to be a variable. It
+is invalid to have "(x+1) = expr" - only things like "x = expr" are
+allowed.
+
+.. code-block:: c++
+
+        // Codegen the RHS.
+        Value *Val = RHS->Codegen();
+        if (Val == 0) return 0;
+
+        // Look up the name.
+        Value *Variable = NamedValues[LHSE->getName()];
+        if (Variable == 0) return ErrorV("Unknown variable name");
+
+        Builder.CreateStore(Val, Variable);
+        return Val;
+      }
+      ...
+
+Once we have the variable, codegen'ing the assignment is
+straightforward: we emit the RHS of the assignment, create a store, and
+return the computed value. Returning a value allows for chained
+assignments like "X = (Y = Z)".
+
+Now that we have an assignment operator, we can mutate loop variables
+and arguments. For example, we can now run code like this:
+
+::
+
+    # Function to print a double.
+    extern printd(x);
+
+    # Define ':' for sequencing: as a low-precedence operator that ignores operands
+    # and just returns the RHS.
+    def binary : 1 (x y) y;
+
+    def test(x)
+      printd(x) :
+      x = 4 :
+      printd(x);
+
+    test(123);
+
+When run, this example prints "123" and then "4", showing that we did
+actually mutate the value! Okay, we have now officially implemented our
+goal: getting this to work requires SSA construction in the general
+case. However, to be really useful, we want the ability to define our
+own local variables, lets add this next!
+
+User-defined Local Variables
+============================
+
+Adding var/in is just like any other other extensions we made to
+Kaleidoscope: we extend the lexer, the parser, the AST and the code
+generator. The first step for adding our new 'var/in' construct is to
+extend the lexer. As before, this is pretty trivial, the code looks like
+this:
+
+.. code-block:: c++
+
+    enum Token {
+      ...
+      // var definition
+      tok_var = -13
+    ...
+    }
+    ...
+    static int gettok() {
+    ...
+        if (IdentifierStr == "in") return tok_in;
+        if (IdentifierStr == "binary") return tok_binary;
+        if (IdentifierStr == "unary") return tok_unary;
+        if (IdentifierStr == "var") return tok_var;
+        return tok_identifier;
+    ...
+
+The next step is to define the AST node that we will construct. For
+var/in, it looks like this:
+
+.. code-block:: c++
+
+    /// VarExprAST - Expression class for var/in
+    class VarExprAST : public ExprAST {
+      std::vector<std::pair<std::string, ExprAST*> > VarNames;
+      ExprAST *Body;
+    public:
+      VarExprAST(const std::vector<std::pair<std::string, ExprAST*> > &varnames,
+                 ExprAST *body)
+      : VarNames(varnames), Body(body) {}
+
+      virtual Value *Codegen();
+    };
+
+var/in allows a list of names to be defined all at once, and each name
+can optionally have an initializer value. As such, we capture this
+information in the VarNames vector. Also, var/in has a body, this body
+is allowed to access the variables defined by the var/in.
+
+With this in place, we can define the parser pieces. The first thing we
+do is add it as a primary expression:
+
+.. code-block:: c++
+
+    /// primary
+    ///   ::= identifierexpr
+    ///   ::= numberexpr
+    ///   ::= parenexpr
+    ///   ::= ifexpr
+    ///   ::= forexpr
+    ///   ::= varexpr
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      case tok_if:         return ParseIfExpr();
+      case tok_for:        return ParseForExpr();
+      case tok_var:        return ParseVarExpr();
+      }
+    }
+
+Next we define ParseVarExpr:
+
+.. code-block:: c++
+
+    /// varexpr ::= 'var' identifier ('=' expression)?
+    //                    (',' identifier ('=' expression)?)* 'in' expression
+    static ExprAST *ParseVarExpr() {
+      getNextToken();  // eat the var.
+
+      std::vector<std::pair<std::string, ExprAST*> > VarNames;
+
+      // At least one variable name is required.
+      if (CurTok != tok_identifier)
+        return Error("expected identifier after var");
+
+The first part of this code parses the list of identifier/expr pairs
+into the local ``VarNames`` vector.
+
+.. code-block:: c++
+
+      while (1) {
+        std::string Name = IdentifierStr;
+        getNextToken();  // eat identifier.
+
+        // Read the optional initializer.
+        ExprAST *Init = 0;
+        if (CurTok == '=') {
+          getNextToken(); // eat the '='.
+
+          Init = ParseExpression();
+          if (Init == 0) return 0;
+        }
+
+        VarNames.push_back(std::make_pair(Name, Init));
+
+        // End of var list, exit loop.
+        if (CurTok != ',') break;
+        getNextToken(); // eat the ','.
+
+        if (CurTok != tok_identifier)
+          return Error("expected identifier list after var");
+      }
+
+Once all the variables are parsed, we then parse the body and create the
+AST node:
+
+.. code-block:: c++
+
+      // At this point, we have to have 'in'.
+      if (CurTok != tok_in)
+        return Error("expected 'in' keyword after 'var'");
+      getNextToken();  // eat 'in'.
+
+      ExprAST *Body = ParseExpression();
+      if (Body == 0) return 0;
+
+      return new VarExprAST(VarNames, Body);
+    }
+
+Now that we can parse and represent the code, we need to support
+emission of LLVM IR for it. This code starts out with:
+
+.. code-block:: c++
+
+    Value *VarExprAST::Codegen() {
+      std::vector<AllocaInst *> OldBindings;
+
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+      // Register all variables and emit their initializer.
+      for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
+        const std::string &VarName = VarNames[i].first;
+        ExprAST *Init = VarNames[i].second;
+
+Basically it loops over all the variables, installing them one at a
+time. For each variable we put into the symbol table, we remember the
+previous value that we replace in OldBindings.
+
+.. code-block:: c++
+
+        // Emit the initializer before adding the variable to scope, this prevents
+        // the initializer from referencing the variable itself, and permits stuff
+        // like this:
+        //  var a = 1 in
+        //    var a = a in ...   # refers to outer 'a'.
+        Value *InitVal;
+        if (Init) {
+          InitVal = Init->Codegen();
+          if (InitVal == 0) return 0;
+        } else { // If not specified, use 0.0.
+          InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
+        }
+
+        AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+        Builder.CreateStore(InitVal, Alloca);
+
+        // Remember the old variable binding so that we can restore the binding when
+        // we unrecurse.
+        OldBindings.push_back(NamedValues[VarName]);
+
+        // Remember this binding.
+        NamedValues[VarName] = Alloca;
+      }
+
+There are more comments here than code. The basic idea is that we emit
+the initializer, create the alloca, then update the symbol table to
+point to it. Once all the variables are installed in the symbol table,
+we evaluate the body of the var/in expression:
+
+.. code-block:: c++
+
+      // Codegen the body, now that all vars are in scope.
+      Value *BodyVal = Body->Codegen();
+      if (BodyVal == 0) return 0;
+
+Finally, before returning, we restore the previous variable bindings:
+
+.. code-block:: c++
+
+      // Pop all our variables from scope.
+      for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
+        NamedValues[VarNames[i].first] = OldBindings[i];
+
+      // Return the body computation.
+      return BodyVal;
+    }
+
+The end result of all of this is that we get properly scoped variable
+definitions, and we even (trivially) allow mutation of them :).
+
+With this, we completed what we set out to do. Our nice iterative fib
+example from the intro compiles and runs just fine. The mem2reg pass
+optimizes all of our stack variables into SSA registers, inserting PHI
+nodes where needed, and our front-end remains simple: no "iterated
+dominance frontier" computation anywhere in sight.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+mutable variables and var/in support. To build this example, use:
+
+.. code-block:: bash
+
+    # Compile
+    clang++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
+    # Run
+    ./toy
+
+Here is the code:
+
+.. code-block:: c++
+
+    #include "llvm/DerivedTypes.h"
+    #include "llvm/ExecutionEngine/ExecutionEngine.h"
+    #include "llvm/ExecutionEngine/JIT.h"
+    #include "llvm/IRBuilder.h"
+    #include "llvm/LLVMContext.h"
+    #include "llvm/Module.h"
+    #include "llvm/PassManager.h"
+    #include "llvm/Analysis/Verifier.h"
+    #include "llvm/Analysis/Passes.h"
+    #include "llvm/DataLayout.h"
+    #include "llvm/Transforms/Scalar.h"
+    #include "llvm/Support/TargetSelect.h"
+    #include <cstdio>
+    #include <string>
+    #include <map>
+    #include <vector>
+    using namespace llvm;
+
+    //===----------------------------------------------------------------------===//
+    // Lexer
+    //===----------------------------------------------------------------------===//
+
+    // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+    // of these for known things.
+    enum Token {
+      tok_eof = -1,
+
+      // commands
+      tok_def = -2, tok_extern = -3,
+
+      // primary
+      tok_identifier = -4, tok_number = -5,
+
+      // control
+      tok_if = -6, tok_then = -7, tok_else = -8,
+      tok_for = -9, tok_in = -10,
+
+      // operators
+      tok_binary = -11, tok_unary = -12,
+
+      // var definition
+      tok_var = -13
+    };
+
+    static std::string IdentifierStr;  // Filled in if tok_identifier
+    static double NumVal;              // Filled in if tok_number
+
+    /// gettok - Return the next token from standard input.
+    static int gettok() {
+      static int LastChar = ' ';
+
+      // Skip any whitespace.
+      while (isspace(LastChar))
+        LastChar = getchar();
+
+      if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+        IdentifierStr = LastChar;
+        while (isalnum((LastChar = getchar())))
+          IdentifierStr += LastChar;
+
+        if (IdentifierStr == "def") return tok_def;
+        if (IdentifierStr == "extern") return tok_extern;
+        if (IdentifierStr == "if") return tok_if;
+        if (IdentifierStr == "then") return tok_then;
+        if (IdentifierStr == "else") return tok_else;
+        if (IdentifierStr == "for") return tok_for;
+        if (IdentifierStr == "in") return tok_in;
+        if (IdentifierStr == "binary") return tok_binary;
+        if (IdentifierStr == "unary") return tok_unary;
+        if (IdentifierStr == "var") return tok_var;
+        return tok_identifier;
+      }
+
+      if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+        std::string NumStr;
+        do {
+          NumStr += LastChar;
+          LastChar = getchar();
+        } while (isdigit(LastChar) || LastChar == '.');
+
+        NumVal = strtod(NumStr.c_str(), 0);
+        return tok_number;
+      }
+
+      if (LastChar == '#') {
+        // Comment until end of line.
+        do LastChar = getchar();
+        while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+
+        if (LastChar != EOF)
+          return gettok();
+      }
+
+      // Check for end of file.  Don't eat the EOF.
+      if (LastChar == EOF)
+        return tok_eof;
+
+      // Otherwise, just return the character as its ascii value.
+      int ThisChar = LastChar;
+      LastChar = getchar();
+      return ThisChar;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Abstract Syntax Tree (aka Parse Tree)
+    //===----------------------------------------------------------------------===//
+
+    /// ExprAST - Base class for all expression nodes.
+    class ExprAST {
+    public:
+      virtual ~ExprAST() {}
+      virtual Value *Codegen() = 0;
+    };
+
+    /// NumberExprAST - Expression class for numeric literals like "1.0".
+    class NumberExprAST : public ExprAST {
+      double Val;
+    public:
+      NumberExprAST(double val) : Val(val) {}
+      virtual Value *Codegen();
+    };
+
+    /// VariableExprAST - Expression class for referencing a variable, like "a".
+    class VariableExprAST : public ExprAST {
+      std::string Name;
+    public:
+      VariableExprAST(const std::string &name) : Name(name) {}
+      const std::string &getName() const { return Name; }
+      virtual Value *Codegen();
+    };
+
+    /// UnaryExprAST - Expression class for a unary operator.
+    class UnaryExprAST : public ExprAST {
+      char Opcode;
+      ExprAST *Operand;
+    public:
+      UnaryExprAST(char opcode, ExprAST *operand)
+        : Opcode(opcode), Operand(operand) {}
+      virtual Value *Codegen();
+    };
+
+    /// BinaryExprAST - Expression class for a binary operator.
+    class BinaryExprAST : public ExprAST {
+      char Op;
+      ExprAST *LHS, *RHS;
+    public:
+      BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
+        : Op(op), LHS(lhs), RHS(rhs) {}
+      virtual Value *Codegen();
+    };
+
+    /// CallExprAST - Expression class for function calls.
+    class CallExprAST : public ExprAST {
+      std::string Callee;
+      std::vector<ExprAST*> Args;
+    public:
+      CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+        : Callee(callee), Args(args) {}
+      virtual Value *Codegen();
+    };
+
+    /// IfExprAST - Expression class for if/then/else.
+    class IfExprAST : public ExprAST {
+      ExprAST *Cond, *Then, *Else;
+    public:
+      IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+      : Cond(cond), Then(then), Else(_else) {}
+      virtual Value *Codegen();
+    };
+
+    /// ForExprAST - Expression class for for/in.
+    class ForExprAST : public ExprAST {
+      std::string VarName;
+      ExprAST *Start, *End, *Step, *Body;
+    public:
+      ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
+                 ExprAST *step, ExprAST *body)
+        : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+      virtual Value *Codegen();
+    };
+
+    /// VarExprAST - Expression class for var/in
+    class VarExprAST : public ExprAST {
+      std::vector<std::pair<std::string, ExprAST*> > VarNames;
+      ExprAST *Body;
+    public:
+      VarExprAST(const std::vector<std::pair<std::string, ExprAST*> > &varnames,
+                 ExprAST *body)
+      : VarNames(varnames), Body(body) {}
+
+      virtual Value *Codegen();
+    };
+
+    /// PrototypeAST - This class represents the "prototype" for a function,
+    /// which captures its name, and its argument names (thus implicitly the number
+    /// of arguments the function takes), as well as if it is an operator.
+    class PrototypeAST {
+      std::string Name;
+      std::vector<std::string> Args;
+      bool isOperator;
+      unsigned Precedence;  // Precedence if a binary op.
+    public:
+      PrototypeAST(const std::string &name, const std::vector<std::string> &args,
+                   bool isoperator = false, unsigned prec = 0)
+      : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
+
+      bool isUnaryOp() const { return isOperator && Args.size() == 1; }
+      bool isBinaryOp() const { return isOperator && Args.size() == 2; }
+
+      char getOperatorName() const {
+        assert(isUnaryOp() || isBinaryOp());
+        return Name[Name.size()-1];
+      }
+
+      unsigned getBinaryPrecedence() const { return Precedence; }
+
+      Function *Codegen();
+
+      void CreateArgumentAllocas(Function *F);
+    };
+
+    /// FunctionAST - This class represents a function definition itself.
+    class FunctionAST {
+      PrototypeAST *Proto;
+      ExprAST *Body;
+    public:
+      FunctionAST(PrototypeAST *proto, ExprAST *body)
+        : Proto(proto), Body(body) {}
+
+      Function *Codegen();
+    };
+
+    //===----------------------------------------------------------------------===//
+    // Parser
+    //===----------------------------------------------------------------------===//
+
+    /// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+    /// token the parser is looking at.  getNextToken reads another token from the
+    /// lexer and updates CurTok with its results.
+    static int CurTok;
+    static int getNextToken() {
+      return CurTok = gettok();
+    }
+
+    /// BinopPrecedence - This holds the precedence for each binary operator that is
+    /// defined.
+    static std::map<char, int> BinopPrecedence;
+
+    /// GetTokPrecedence - Get the precedence of the pending binary operator token.
+    static int GetTokPrecedence() {
+      if (!isascii(CurTok))
+        return -1;
+
+      // Make sure it's a declared binop.
+      int TokPrec = BinopPrecedence[CurTok];
+      if (TokPrec <= 0) return -1;
+      return TokPrec;
+    }
+
+    /// Error* - These are little helper functions for error handling.
+    ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+    PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+    FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+    static ExprAST *ParseExpression();
+
+    /// identifierexpr
+    ///   ::= identifier
+    ///   ::= identifier '(' expression* ')'
+    static ExprAST *ParseIdentifierExpr() {
+      std::string IdName = IdentifierStr;
+
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '(') // Simple variable ref.
+        return new VariableExprAST(IdName);
+
+      // Call.
+      getNextToken();  // eat (
+      std::vector<ExprAST*> Args;
+      if (CurTok != ')') {
+        while (1) {
+          ExprAST *Arg = ParseExpression();
+          if (!Arg) return 0;
+          Args.push_back(Arg);
+
+          if (CurTok == ')') break;
+
+          if (CurTok != ',')
+            return Error("Expected ')' or ',' in argument list");
+          getNextToken();
+        }
+      }
+
+      // Eat the ')'.
+      getNextToken();
+
+      return new CallExprAST(IdName, Args);
+    }
+
+    /// numberexpr ::= number
+    static ExprAST *ParseNumberExpr() {
+      ExprAST *Result = new NumberExprAST(NumVal);
+      getNextToken(); // consume the number
+      return Result;
+    }
+
+    /// parenexpr ::= '(' expression ')'
+    static ExprAST *ParseParenExpr() {
+      getNextToken();  // eat (.
+      ExprAST *V = ParseExpression();
+      if (!V) return 0;
+
+      if (CurTok != ')')
+        return Error("expected ')'");
+      getNextToken();  // eat ).
+      return V;
+    }
+
+    /// ifexpr ::= 'if' expression 'then' expression 'else' expression
+    static ExprAST *ParseIfExpr() {
+      getNextToken();  // eat the if.
+
+      // condition.
+      ExprAST *Cond = ParseExpression();
+      if (!Cond) return 0;
+
+      if (CurTok != tok_then)
+        return Error("expected then");
+      getNextToken();  // eat the then
+
+      ExprAST *Then = ParseExpression();
+      if (Then == 0) return 0;
+
+      if (CurTok != tok_else)
+        return Error("expected else");
+
+      getNextToken();
+
+      ExprAST *Else = ParseExpression();
+      if (!Else) return 0;
+
+      return new IfExprAST(Cond, Then, Else);
+    }
+
+    /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+    static ExprAST *ParseForExpr() {
+      getNextToken();  // eat the for.
+
+      if (CurTok != tok_identifier)
+        return Error("expected identifier after for");
+
+      std::string IdName = IdentifierStr;
+      getNextToken();  // eat identifier.
+
+      if (CurTok != '=')
+        return Error("expected '=' after for");
+      getNextToken();  // eat '='.
+
+
+      ExprAST *Start = ParseExpression();
+      if (Start == 0) return 0;
+      if (CurTok != ',')
+        return Error("expected ',' after for start value");
+      getNextToken();
+
+      ExprAST *End = ParseExpression();
+      if (End == 0) return 0;
+
+      // The step value is optional.
+      ExprAST *Step = 0;
+      if (CurTok == ',') {
+        getNextToken();
+        Step = ParseExpression();
+        if (Step == 0) return 0;
+      }
+
+      if (CurTok != tok_in)
+        return Error("expected 'in' after for");
+      getNextToken();  // eat 'in'.
+
+      ExprAST *Body = ParseExpression();
+      if (Body == 0) return 0;
+
+      return new ForExprAST(IdName, Start, End, Step, Body);
+    }
+
+    /// varexpr ::= 'var' identifier ('=' expression)?
+    //                    (',' identifier ('=' expression)?)* 'in' expression
+    static ExprAST *ParseVarExpr() {
+      getNextToken();  // eat the var.
+
+      std::vector<std::pair<std::string, ExprAST*> > VarNames;
+
+      // At least one variable name is required.
+      if (CurTok != tok_identifier)
+        return Error("expected identifier after var");
+
+      while (1) {
+        std::string Name = IdentifierStr;
+        getNextToken();  // eat identifier.
+
+        // Read the optional initializer.
+        ExprAST *Init = 0;
+        if (CurTok == '=') {
+          getNextToken(); // eat the '='.
+
+          Init = ParseExpression();
+          if (Init == 0) return 0;
+        }
+
+        VarNames.push_back(std::make_pair(Name, Init));
+
+        // End of var list, exit loop.
+        if (CurTok != ',') break;
+        getNextToken(); // eat the ','.
+
+        if (CurTok != tok_identifier)
+          return Error("expected identifier list after var");
+      }
+
+      // At this point, we have to have 'in'.
+      if (CurTok != tok_in)
+        return Error("expected 'in' keyword after 'var'");
+      getNextToken();  // eat 'in'.
+
+      ExprAST *Body = ParseExpression();
+      if (Body == 0) return 0;
+
+      return new VarExprAST(VarNames, Body);
+    }
+
+    /// primary
+    ///   ::= identifierexpr
+    ///   ::= numberexpr
+    ///   ::= parenexpr
+    ///   ::= ifexpr
+    ///   ::= forexpr
+    ///   ::= varexpr
+    static ExprAST *ParsePrimary() {
+      switch (CurTok) {
+      default: return Error("unknown token when expecting an expression");
+      case tok_identifier: return ParseIdentifierExpr();
+      case tok_number:     return ParseNumberExpr();
+      case '(':            return ParseParenExpr();
+      case tok_if:         return ParseIfExpr();
+      case tok_for:        return ParseForExpr();
+      case tok_var:        return ParseVarExpr();
+      }
+    }
+
+    /// unary
+    ///   ::= primary
+    ///   ::= '!' unary
+    static ExprAST *ParseUnary() {
+      // If the current token is not an operator, it must be a primary expr.
+      if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+        return ParsePrimary();
+
+      // If this is a unary operator, read it.
+      int Opc = CurTok;
+      getNextToken();
+      if (ExprAST *Operand = ParseUnary())
+        return new UnaryExprAST(Opc, Operand);
+      return 0;
+    }
+
+    /// binoprhs
+    ///   ::= ('+' unary)*
+    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+      // If this is a binop, find its precedence.
+      while (1) {
+        int TokPrec = GetTokPrecedence();
+
+        // If this is a binop that binds at least as tightly as the current binop,
+        // consume it, otherwise we are done.
+        if (TokPrec < ExprPrec)
+          return LHS;
+
+        // Okay, we know this is a binop.
+        int BinOp = CurTok;
+        getNextToken();  // eat binop
+
+        // Parse the unary expression after the binary operator.
+        ExprAST *RHS = ParseUnary();
+        if (!RHS) return 0;
+
+        // If BinOp binds less tightly with RHS than the operator after RHS, let
+        // the pending operator take RHS as its LHS.
+        int NextPrec = GetTokPrecedence();
+        if (TokPrec < NextPrec) {
+          RHS = ParseBinOpRHS(TokPrec+1, RHS);
+          if (RHS == 0) return 0;
+        }
+
+        // Merge LHS/RHS.
+        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+      }
+    }
+
+    /// expression
+    ///   ::= unary binoprhs
+    ///
+    static ExprAST *ParseExpression() {
+      ExprAST *LHS = ParseUnary();
+      if (!LHS) return 0;
+
+      return ParseBinOpRHS(0, LHS);
+    }
+
+    /// prototype
+    ///   ::= id '(' id* ')'
+    ///   ::= binary LETTER number? (id, id)
+    ///   ::= unary LETTER (id)
+    static PrototypeAST *ParsePrototype() {
+      std::string FnName;
+
+      unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
+      unsigned BinaryPrecedence = 30;
+
+      switch (CurTok) {
+      default:
+        return ErrorP("Expected function name in prototype");
+      case tok_identifier:
+        FnName = IdentifierStr;
+        Kind = 0;
+        getNextToken();
+        break;
+      case tok_unary:
+        getNextToken();
+        if (!isascii(CurTok))
+          return ErrorP("Expected unary operator");
+        FnName = "unary";
+        FnName += (char)CurTok;
+        Kind = 1;
+        getNextToken();
+        break;
+      case tok_binary:
+        getNextToken();
+        if (!isascii(CurTok))
+          return ErrorP("Expected binary operator");
+        FnName = "binary";
+        FnName += (char)CurTok;
+        Kind = 2;
+        getNextToken();
+
+        // Read the precedence if present.
+        if (CurTok == tok_number) {
+          if (NumVal < 1 || NumVal > 100)
+            return ErrorP("Invalid precedecnce: must be 1..100");
+          BinaryPrecedence = (unsigned)NumVal;
+          getNextToken();
+        }
+        break;
+      }
+
+      if (CurTok != '(')
+        return ErrorP("Expected '(' in prototype");
+
+      std::vector<std::string> ArgNames;
+      while (getNextToken() == tok_identifier)
+        ArgNames.push_back(IdentifierStr);
+      if (CurTok != ')')
+        return ErrorP("Expected ')' in prototype");
+
+      // success.
+      getNextToken();  // eat ')'.
+
+      // Verify right number of names for operator.
+      if (Kind && ArgNames.size() != Kind)
+        return ErrorP("Invalid number of operands for operator");
+
+      return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
+    }
+
+    /// definition ::= 'def' prototype expression
+    static FunctionAST *ParseDefinition() {
+      getNextToken();  // eat def.
+      PrototypeAST *Proto = ParsePrototype();
+      if (Proto == 0) return 0;
+
+      if (ExprAST *E = ParseExpression())
+        return new FunctionAST(Proto, E);
+      return 0;
+    }
+
+    /// toplevelexpr ::= expression
+    static FunctionAST *ParseTopLevelExpr() {
+      if (ExprAST *E = ParseExpression()) {
+        // Make an anonymous proto.
+        PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+        return new FunctionAST(Proto, E);
+      }
+      return 0;
+    }
+
+    /// external ::= 'extern' prototype
+    static PrototypeAST *ParseExtern() {
+      getNextToken();  // eat extern.
+      return ParsePrototype();
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Code Generation
+    //===----------------------------------------------------------------------===//
+
+    static Module *TheModule;
+    static IRBuilder<> Builder(getGlobalContext());
+    static std::map<std::string, AllocaInst*> NamedValues;
+    static FunctionPassManager *TheFPM;
+
+    Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+    /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
+    /// the function.  This is used for mutable variables etc.
+    static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
+                                              const std::string &VarName) {
+      IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
+                     TheFunction->getEntryBlock().begin());
+      return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
+                               VarName.c_str());
+    }
+
+    Value *NumberExprAST::Codegen() {
+      return ConstantFP::get(getGlobalContext(), APFloat(Val));
+    }
+
+    Value *VariableExprAST::Codegen() {
+      // Look this variable up in the function.
+      Value *V = NamedValues[Name];
+      if (V == 0) return ErrorV("Unknown variable name");
+
+      // Load the value.
+      return Builder.CreateLoad(V, Name.c_str());
+    }
+
+    Value *UnaryExprAST::Codegen() {
+      Value *OperandV = Operand->Codegen();
+      if (OperandV == 0) return 0;
+
+      Function *F = TheModule->getFunction(std::string("unary")+Opcode);
+      if (F == 0)
+        return ErrorV("Unknown unary operator");
+
+      return Builder.CreateCall(F, OperandV, "unop");
+    }
+
+    Value *BinaryExprAST::Codegen() {
+      // Special case '=' because we don't want to emit the LHS as an expression.
+      if (Op == '=') {
+        // Assignment requires the LHS to be an identifier.
+        VariableExprAST *LHSE = dynamic_cast<VariableExprAST*>(LHS);
+        if (!LHSE)
+          return ErrorV("destination of '=' must be a variable");
+        // Codegen the RHS.
+        Value *Val = RHS->Codegen();
+        if (Val == 0) return 0;
+
+        // Look up the name.
+        Value *Variable = NamedValues[LHSE->getName()];
+        if (Variable == 0) return ErrorV("Unknown variable name");
+
+        Builder.CreateStore(Val, Variable);
+        return Val;
+      }
+
+      Value *L = LHS->Codegen();
+      Value *R = RHS->Codegen();
+      if (L == 0 || R == 0) return 0;
+
+      switch (Op) {
+      case '+': return Builder.CreateFAdd(L, R, "addtmp");
+      case '-': return Builder.CreateFSub(L, R, "subtmp");
+      case '*': return Builder.CreateFMul(L, R, "multmp");
+      case '<':
+        L = Builder.CreateFCmpULT(L, R, "cmptmp");
+        // Convert bool 0/1 to double 0.0 or 1.0
+        return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                    "booltmp");
+      default: break;
+      }
+
+      // If it wasn't a builtin binary operator, it must be a user defined one. Emit
+      // a call to it.
+      Function *F = TheModule->getFunction(std::string("binary")+Op);
+      assert(F && "binary operator not found!");
+
+      Value *Ops[2] = { L, R };
+      return Builder.CreateCall(F, Ops, "binop");
+    }
+
+    Value *CallExprAST::Codegen() {
+      // Look up the name in the global module table.
+      Function *CalleeF = TheModule->getFunction(Callee);
+      if (CalleeF == 0)
+        return ErrorV("Unknown function referenced");
+
+      // If argument mismatch error.
+      if (CalleeF->arg_size() != Args.size())
+        return ErrorV("Incorrect # arguments passed");
+
+      std::vector<Value*> ArgsV;
+      for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+        ArgsV.push_back(Args[i]->Codegen());
+        if (ArgsV.back() == 0) return 0;
+      }
+
+      return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
+    }
+
+    Value *IfExprAST::Codegen() {
+      Value *CondV = Cond->Codegen();
+      if (CondV == 0) return 0;
+
+      // Convert condition to a bool by comparing equal to 0.0.
+      CondV = Builder.CreateFCmpONE(CondV,
+                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                    "ifcond");
+
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+      // Create blocks for the then and else cases.  Insert the 'then' block at the
+      // end of the function.
+      BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+      BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+      BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+
+      Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+
+      // Emit then value.
+      Builder.SetInsertPoint(ThenBB);
+
+      Value *ThenV = Then->Codegen();
+      if (ThenV == 0) return 0;
+
+      Builder.CreateBr(MergeBB);
+      // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+      ThenBB = Builder.GetInsertBlock();
+
+      // Emit else block.
+      TheFunction->getBasicBlockList().push_back(ElseBB);
+      Builder.SetInsertPoint(ElseBB);
+
+      Value *ElseV = Else->Codegen();
+      if (ElseV == 0) return 0;
+
+      Builder.CreateBr(MergeBB);
+      // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+      ElseBB = Builder.GetInsertBlock();
+
+      // Emit merge block.
+      TheFunction->getBasicBlockList().push_back(MergeBB);
+      Builder.SetInsertPoint(MergeBB);
+      PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
+                                      "iftmp");
+
+      PN->addIncoming(ThenV, ThenBB);
+      PN->addIncoming(ElseV, ElseBB);
+      return PN;
+    }
+
+    Value *ForExprAST::Codegen() {
+      // Output this as:
+      //   var = alloca double
+      //   ...
+      //   start = startexpr
+      //   store start -> var
+      //   goto loop
+      // loop:
+      //   ...
+      //   bodyexpr
+      //   ...
+      // loopend:
+      //   step = stepexpr
+      //   endcond = endexpr
+      //
+      //   curvar = load var
+      //   nextvar = curvar + step
+      //   store nextvar -> var
+      //   br endcond, loop, endloop
+      // outloop:
+
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+      // Create an alloca for the variable in the entry block.
+      AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+
+      // Emit the start code first, without 'variable' in scope.
+      Value *StartVal = Start->Codegen();
+      if (StartVal == 0) return 0;
+
+      // Store the value into the alloca.
+      Builder.CreateStore(StartVal, Alloca);
+
+      // Make the new basic block for the loop header, inserting after current
+      // block.
+      BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+
+      // Insert an explicit fall through from the current block to the LoopBB.
+      Builder.CreateBr(LoopBB);
+
+      // Start insertion in LoopBB.
+      Builder.SetInsertPoint(LoopBB);
+
+      // Within the loop, the variable is defined equal to the PHI node.  If it
+      // shadows an existing variable, we have to restore it, so save it now.
+      AllocaInst *OldVal = NamedValues[VarName];
+      NamedValues[VarName] = Alloca;
+
+      // Emit the body of the loop.  This, like any other expr, can change the
+      // current BB.  Note that we ignore the value computed by the body, but don't
+      // allow an error.
+      if (Body->Codegen() == 0)
+        return 0;
+
+      // Emit the step value.
+      Value *StepVal;
+      if (Step) {
+        StepVal = Step->Codegen();
+        if (StepVal == 0) return 0;
+      } else {
+        // If not specified, use 1.0.
+        StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+      }
+
+      // Compute the end condition.
+      Value *EndCond = End->Codegen();
+      if (EndCond == 0) return EndCond;
+
+      // Reload, increment, and restore the alloca.  This handles the case where
+      // the body of the loop mutates the variable.
+      Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str());
+      Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
+      Builder.CreateStore(NextVar, Alloca);
+
+      // Convert condition to a bool by comparing equal to 0.0.
+      EndCond = Builder.CreateFCmpONE(EndCond,
+                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                      "loopcond");
+
+      // Create the "after loop" block and insert it.
+      BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+
+      // Insert the conditional branch into the end of LoopEndBB.
+      Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+
+      // Any new code will be inserted in AfterBB.
+      Builder.SetInsertPoint(AfterBB);
+
+      // Restore the unshadowed variable.
+      if (OldVal)
+        NamedValues[VarName] = OldVal;
+      else
+        NamedValues.erase(VarName);
+
+
+      // for expr always returns 0.0.
+      return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+    }
+
+    Value *VarExprAST::Codegen() {
+      std::vector<AllocaInst *> OldBindings;
+
+      Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+      // Register all variables and emit their initializer.
+      for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
+        const std::string &VarName = VarNames[i].first;
+        ExprAST *Init = VarNames[i].second;
+
+        // Emit the initializer before adding the variable to scope, this prevents
+        // the initializer from referencing the variable itself, and permits stuff
+        // like this:
+        //  var a = 1 in
+        //    var a = a in ...   # refers to outer 'a'.
+        Value *InitVal;
+        if (Init) {
+          InitVal = Init->Codegen();
+          if (InitVal == 0) return 0;
+        } else { // If not specified, use 0.0.
+          InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
+        }
+
+        AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+        Builder.CreateStore(InitVal, Alloca);
+
+        // Remember the old variable binding so that we can restore the binding when
+        // we unrecurse.
+        OldBindings.push_back(NamedValues[VarName]);
+
+        // Remember this binding.
+        NamedValues[VarName] = Alloca;
+      }
+
+      // Codegen the body, now that all vars are in scope.
+      Value *BodyVal = Body->Codegen();
+      if (BodyVal == 0) return 0;
+
+      // Pop all our variables from scope.
+      for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
+        NamedValues[VarNames[i].first] = OldBindings[i];
+
+      // Return the body computation.
+      return BodyVal;
+    }
+
+    Function *PrototypeAST::Codegen() {
+      // Make the function type:  double(double,double) etc.
+      std::vector<Type*> Doubles(Args.size(),
+                                 Type::getDoubleTy(getGlobalContext()));
+      FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                           Doubles, false);
+
+      Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+
+      // If F conflicted, there was already something named 'Name'.  If it has a
+      // body, don't allow redefinition or reextern.
+      if (F->getName() != Name) {
+        // Delete the one we just made and get the existing one.
+        F->eraseFromParent();
+        F = TheModule->getFunction(Name);
+
+        // If F already has a body, reject this.
+        if (!F->empty()) {
+          ErrorF("redefinition of function");
+          return 0;
+        }
+
+        // If F took a different number of args, reject.
+        if (F->arg_size() != Args.size()) {
+          ErrorF("redefinition of function with different # args");
+          return 0;
+        }
+      }
+
+      // Set names for all arguments.
+      unsigned Idx = 0;
+      for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+           ++AI, ++Idx)
+        AI->setName(Args[Idx]);
+
+      return F;
+    }
+
+    /// CreateArgumentAllocas - Create an alloca for each argument and register the
+    /// argument in the symbol table so that references to it will succeed.
+    void PrototypeAST::CreateArgumentAllocas(Function *F) {
+      Function::arg_iterator AI = F->arg_begin();
+      for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
+        // Create an alloca for this variable.
+        AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
+
+        // Store the initial value into the alloca.
+        Builder.CreateStore(AI, Alloca);
+
+        // Add arguments to variable symbol table.
+        NamedValues[Args[Idx]] = Alloca;
+      }
+    }
+
+    Function *FunctionAST::Codegen() {
+      NamedValues.clear();
+
+      Function *TheFunction = Proto->Codegen();
+      if (TheFunction == 0)
+        return 0;
+
+      // If this is an operator, install it.
+      if (Proto->isBinaryOp())
+        BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
+
+      // Create a new basic block to start insertion into.
+      BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+      Builder.SetInsertPoint(BB);
+
+      // Add all arguments to the symbol table and create their allocas.
+      Proto->CreateArgumentAllocas(TheFunction);
+
+      if (Value *RetVal = Body->Codegen()) {
+        // Finish off the function.
+        Builder.CreateRet(RetVal);
+
+        // Validate the generated code, checking for consistency.
+        verifyFunction(*TheFunction);
+
+        // Optimize the function.
+        TheFPM->run(*TheFunction);
+
+        return TheFunction;
+      }
+
+      // Error reading body, remove function.
+      TheFunction->eraseFromParent();
+
+      if (Proto->isBinaryOp())
+        BinopPrecedence.erase(Proto->getOperatorName());
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Top-Level parsing and JIT Driver
+    //===----------------------------------------------------------------------===//
+
+    static ExecutionEngine *TheExecutionEngine;
+
+    static void HandleDefinition() {
+      if (FunctionAST *F = ParseDefinition()) {
+        if (Function *LF = F->Codegen()) {
+          fprintf(stderr, "Read function definition:");
+          LF->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleExtern() {
+      if (PrototypeAST *P = ParseExtern()) {
+        if (Function *F = P->Codegen()) {
+          fprintf(stderr, "Read extern: ");
+          F->dump();
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    static void HandleTopLevelExpression() {
+      // Evaluate a top-level expression into an anonymous function.
+      if (FunctionAST *F = ParseTopLevelExpr()) {
+        if (Function *LF = F->Codegen()) {
+          // JIT the function, returning a function pointer.
+          void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+
+          // Cast it to the right type (takes no arguments, returns a double) so we
+          // can call it as a native function.
+          double (*FP)() = (double (*)())(intptr_t)FPtr;
+          fprintf(stderr, "Evaluated to %f\n", FP());
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+    /// top ::= definition | external | expression | ';'
+    static void MainLoop() {
+      while (1) {
+        fprintf(stderr, "ready> ");
+        switch (CurTok) {
+        case tok_eof:    return;
+        case ';':        getNextToken(); break;  // ignore top-level semicolons.
+        case tok_def:    HandleDefinition(); break;
+        case tok_extern: HandleExtern(); break;
+        default:         HandleTopLevelExpression(); break;
+        }
+      }
+    }
+
+    //===----------------------------------------------------------------------===//
+    // "Library" functions that can be "extern'd" from user code.
+    //===----------------------------------------------------------------------===//
+
+    /// putchard - putchar that takes a double and returns 0.
+    extern "C"
+    double putchard(double X) {
+      putchar((char)X);
+      return 0;
+    }
+
+    /// printd - printf that takes a double prints it as "%f\n", returning 0.
+    extern "C"
+    double printd(double X) {
+      printf("%f\n", X);
+      return 0;
+    }
+
+    //===----------------------------------------------------------------------===//
+    // Main driver code.
+    //===----------------------------------------------------------------------===//
+
+    int main() {
+      InitializeNativeTarget();
+      LLVMContext &Context = getGlobalContext();
+
+      // Install standard binary operators.
+      // 1 is lowest precedence.
+      BinopPrecedence['='] = 2;
+      BinopPrecedence['<'] = 10;
+      BinopPrecedence['+'] = 20;
+      BinopPrecedence['-'] = 20;
+      BinopPrecedence['*'] = 40;  // highest.
+
+      // Prime the first token.
+      fprintf(stderr, "ready> ");
+      getNextToken();
+
+      // Make the module, which holds all the code.
+      TheModule = new Module("my cool jit", Context);
+
+      // Create the JIT.  This takes ownership of the module.
+      std::string ErrStr;
+      TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+      if (!TheExecutionEngine) {
+        fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
+        exit(1);
+      }
+
+      FunctionPassManager OurFPM(TheModule);
+
+      // Set up the optimizer pipeline.  Start with registering info about how the
+      // target lays out data structures.
+      OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
+      // Provide basic AliasAnalysis support for GVN.
+      OurFPM.add(createBasicAliasAnalysisPass());
+      // Promote allocas to registers.
+      OurFPM.add(createPromoteMemoryToRegisterPass());
+      // Do simple "peephole" optimizations and bit-twiddling optzns.
+      OurFPM.add(createInstructionCombiningPass());
+      // Reassociate expressions.
+      OurFPM.add(createReassociatePass());
+      // Eliminate Common SubExpressions.
+      OurFPM.add(createGVNPass());
+      // Simplify the control flow graph (deleting unreachable blocks, etc).
+      OurFPM.add(createCFGSimplificationPass());
+
+      OurFPM.doInitialization();
+
+      // Set the global so the code gen can use this.
+      TheFPM = &OurFPM;
+
+      // Run the main "interpreter loop" now.
+      MainLoop();
+
+      TheFPM = 0;
+
+      // Print out all of the generated code.
+      TheModule->dump();
+
+      return 0;
+    }
+
+`Next: Conclusion and other useful LLVM tidbits <LangImpl8.html>`_
+
diff --git a/docs/tutorial/LangImpl8.html b/docs/tutorial/LangImpl8.html
deleted file mode 100644
index 7c1a500a21..0000000000
--- a/docs/tutorial/LangImpl8.html
+++ /dev/null
@@ -1,359 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Conclusion and other useful LLVM tidbits</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Conclusion and other useful LLVM tidbits</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 8
-  <ol>
-    <li><a href="#conclusion">Tutorial Conclusion</a></li>
-    <li><a href="#llvmirproperties">Properties of LLVM IR</a>
-    <ul>
-      <li><a href="#targetindep">Target Independence</a></li>
-      <li><a href="#safety">Safety Guarantees</a></li>
-      <li><a href="#langspecific">Language-Specific Optimizations</a></li>
-    </ul>
-    </li>
-    <li><a href="#tipsandtricks">Tips and Tricks</a>
-    <ul>
-      <li><a href="#offsetofsizeof">Implementing portable 
-                                    offsetof/sizeof</a></li>
-      <li><a href="#gcstack">Garbage Collected Stack Frames</a></li>
-    </ul>
-    </li>
-  </ol>
-</li>
-</ul>
-
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="conclusion">Tutorial Conclusion</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to the final chapter of the "<a href="index.html">Implementing a
-language with LLVM</a>" tutorial.  In the course of this tutorial, we have grown
-our little Kaleidoscope language from being a useless toy, to being a
-semi-interesting (but probably still useless) toy. :)</p>
-
-<p>It is interesting to see how far we've come, and how little code it has
-taken.  We built the entire lexer, parser, AST, code generator, and an 
-interactive run-loop (with a JIT!) by-hand in under 700 lines of
-(non-comment/non-blank) code.</p>
-
-<p>Our little language supports a couple of interesting features: it supports
-user defined binary and unary operators, it uses JIT compilation for immediate
-evaluation, and it supports a few control flow constructs with SSA construction.
-</p>
-
-<p>Part of the idea of this tutorial was to show you how easy and fun it can be
-to define, build, and play with languages.  Building a compiler need not be a
-scary or mystical process!  Now that you've seen some of the basics, I strongly
-encourage you to take the code and hack on it.  For example, try adding:</p>
-
-<ul>
-<li><b>global variables</b> - While global variables have questional value in
-modern software engineering, they are often useful when putting together quick
-little hacks like the Kaleidoscope compiler itself.  Fortunately, our current
-setup makes it very easy to add global variables: just have value lookup check
-to see if an unresolved variable is in the global variable symbol table before
-rejecting it.  To create a new global variable, make an instance of the LLVM
-<tt>GlobalVariable</tt> class.</li>
-
-<li><b>typed variables</b> - Kaleidoscope currently only supports variables of
-type double.  This gives the language a very nice elegance, because only
-supporting one type means that you never have to specify types.  Different
-languages have different ways of handling this.  The easiest way is to require
-the user to specify types for every variable definition, and record the type
-of the variable in the symbol table along with its Value*.</li>
-
-<li><b>arrays, structs, vectors, etc</b> - Once you add types, you can start
-extending the type system in all sorts of interesting ways.  Simple arrays are
-very easy and are quite useful for many different applications.  Adding them is
-mostly an exercise in learning how the LLVM <a 
-href="../LangRef.html#i_getelementptr">getelementptr</a> instruction works: it
-is so nifty/unconventional, it <a 
-href="../GetElementPtr.html">has its own FAQ</a>!  If you add support
-for recursive types (e.g. linked lists), make sure to read the <a 
-href="../ProgrammersManual.html#TypeResolve">section in the LLVM
-Programmer's Manual</a> that describes how to construct them.</li>
-
-<li><b>standard runtime</b> - Our current language allows the user to access
-arbitrary external functions, and we use it for things like "printd" and
-"putchard".  As you extend the language to add higher-level constructs, often
-these constructs make the most sense if they are lowered to calls into a
-language-supplied runtime.  For example, if you add hash tables to the language,
-it would probably make sense to add the routines to a runtime, instead of 
-inlining them all the way.</li>
-
-<li><b>memory management</b> - Currently we can only access the stack in
-Kaleidoscope.  It would also be useful to be able to allocate heap memory,
-either with calls to the standard libc malloc/free interface or with a garbage
-collector.  If you would like to use garbage collection, note that LLVM fully
-supports <a href="../GarbageCollection.html">Accurate Garbage Collection</a>
-including algorithms that move objects and need to scan/update the stack.</li>
-
-<li><b>debugger support</b> - LLVM supports generation of <a 
-href="../SourceLevelDebugging.html">DWARF Debug info</a> which is understood by
-common debuggers like GDB.  Adding support for debug info is fairly 
-straightforward.  The best way to understand it is to compile some C/C++ code
-with "<tt>llvm-gcc -g -O0</tt>" and taking a look at what it produces.</li>
-
-<li><b>exception handling support</b> - LLVM supports generation of <a 
-href="../ExceptionHandling.html">zero cost exceptions</a> which interoperate
-with code compiled in other languages.  You could also generate code by
-implicitly making every function return an error value and checking it.  You 
-could also make explicit use of setjmp/longjmp.  There are many different ways
-to go here.</li>
-
-<li><b>object orientation, generics, database access, complex numbers,
-geometric programming, ...</b> - Really, there is
-no end of crazy features that you can add to the language.</li>
-
-<li><b>unusual domains</b> - We've been talking about applying LLVM to a domain
-that many people are interested in: building a compiler for a specific language.
-However, there are many other domains that can use compiler technology that are
-not typically considered.  For example, LLVM has been used to implement OpenGL
-graphics acceleration, translate C++ code to ActionScript, and many other
-cute and clever things.  Maybe you will be the first to JIT compile a regular
-expression interpreter into native code with LLVM?</li>
-
-</ul>
-
-<p>
-Have fun - try doing something crazy and unusual.  Building a language like
-everyone else always has, is much less fun than trying something a little crazy
-or off the wall and seeing how it turns out.  If you get stuck or want to talk
-about it, feel free to email the <a 
-href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev mailing 
-list</a>: it has lots of people who are interested in languages and are often
-willing to help out.
-</p>
-
-<p>Before we end this tutorial, I want to talk about some "tips and tricks" for generating
-LLVM IR.  These are some of the more subtle things that may not be obvious, but
-are very useful if you want to take advantage of LLVM's capabilities.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="llvmirproperties">Properties of the LLVM IR</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>We have a couple common questions about code in the LLVM IR form - lets just
-get these out of the way right now, shall we?</p>
-
-<!-- ======================================================================= -->
-<h4><a name="targetindep">Target Independence</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Kaleidoscope is an example of a "portable language": any program written in
-Kaleidoscope will work the same way on any target that it runs on.  Many other
-languages have this property, e.g. lisp, java, haskell, javascript, python, etc
-(note that while these languages are portable, not all their libraries are).</p>
-
-<p>One nice aspect of LLVM is that it is often capable of preserving target
-independence in the IR: you can take the LLVM IR for a Kaleidoscope-compiled 
-program and run it on any target that LLVM supports, even emitting C code and
-compiling that on targets that LLVM doesn't support natively.  You can trivially
-tell that the Kaleidoscope compiler generates target-independent code because it
-never queries for any target-specific information when generating code.</p>
-
-<p>The fact that LLVM provides a compact, target-independent, representation for
-code gets a lot of people excited.  Unfortunately, these people are usually
-thinking about C or a language from the C family when they are asking questions
-about language portability.  I say "unfortunately", because there is really no
-way to make (fully general) C code portable, other than shipping the source code
-around (and of course, C source code is not actually portable in general
-either - ever port a really old application from 32- to 64-bits?).</p>
-
-<p>The problem with C (again, in its full generality) is that it is heavily
-laden with target specific assumptions.  As one simple example, the preprocessor
-often destructively removes target-independence from the code when it processes
-the input text:</p>
-
-<div class="doc_code">
-<pre>
-#ifdef __i386__
-  int X = 1;
-#else
-  int X = 42;
-#endif
-</pre>
-</div>
-
-<p>While it is possible to engineer more and more complex solutions to problems
-like this, it cannot be solved in full generality in a way that is better than shipping
-the actual source code.</p>
-
-<p>That said, there are interesting subsets of C that can be made portable.  If
-you are willing to fix primitive types to a fixed size (say int = 32-bits, 
-and long = 64-bits), don't care about ABI compatibility with existing binaries,
-and are willing to give up some other minor features, you can have portable
-code.  This can make sense for specialized domains such as an
-in-kernel language.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="safety">Safety Guarantees</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Many of the languages above are also "safe" languages: it is impossible for
-a program written in Java to corrupt its address space and crash the process
-(assuming the JVM has no bugs).
-Safety is an interesting property that requires a combination of language
-design, runtime support, and often operating system support.</p>
-
-<p>It is certainly possible to implement a safe language in LLVM, but LLVM IR
-does not itself guarantee safety.  The LLVM IR allows unsafe pointer casts,
-use after free bugs, buffer over-runs, and a variety of other problems.  Safety
-needs to be implemented as a layer on top of LLVM and, conveniently, several
-groups have investigated this.  Ask on the <a 
-href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev mailing 
-list</a> if you are interested in more details.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="langspecific">Language-Specific Optimizations</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>One thing about LLVM that turns off many people is that it does not solve all
-the world's problems in one system (sorry 'world hunger', someone else will have
-to solve you some other day).  One specific complaint is that people perceive
-LLVM as being incapable of performing high-level language-specific optimization:
-LLVM "loses too much information".</p>
-
-<p>Unfortunately, this is really not the place to give you a full and unified
-version of "Chris Lattner's theory of compiler design".  Instead, I'll make a
-few observations:</p>
-
-<p>First, you're right that LLVM does lose information.  For example, as of this
-writing, there is no way to distinguish in the LLVM IR whether an SSA-value came
-from a C "int" or a C "long" on an ILP32 machine (other than debug info).  Both
-get compiled down to an 'i32' value and the information about what it came from
-is lost.  The more general issue here, is that the LLVM type system uses
-"structural equivalence" instead of "name equivalence".  Another place this
-surprises people is if you have two types in a high-level language that have the
-same structure (e.g. two different structs that have a single int field): these
-types will compile down into a single LLVM type and it will be impossible to
-tell what it came from.</p>
-
-<p>Second, while LLVM does lose information, LLVM is not a fixed target: we 
-continue to enhance and improve it in many different ways.  In addition to
-adding new features (LLVM did not always support exceptions or debug info), we
-also extend the IR to capture important information for optimization (e.g.
-whether an argument is sign or zero extended, information about pointers
-aliasing, etc).  Many of the enhancements are user-driven: people want LLVM to
-include some specific feature, so they go ahead and extend it.</p>
-
-<p>Third, it is <em>possible and easy</em> to add language-specific
-optimizations, and you have a number of choices in how to do it.  As one trivial
-example, it is easy to add language-specific optimization passes that
-"know" things about code compiled for a language.  In the case of the C family,
-there is an optimization pass that "knows" about the standard C library
-functions.  If you call "exit(0)" in main(), it knows that it is safe to
-optimize that into "return 0;" because C specifies what the 'exit'
-function does.</p>
-
-<p>In addition to simple library knowledge, it is possible to embed a variety of
-other language-specific information into the LLVM IR.  If you have a specific
-need and run into a wall, please bring the topic up on the llvmdev list.  At the
-very worst, you can always treat LLVM as if it were a "dumb code generator" and
-implement the high-level optimizations you desire in your front-end, on the
-language-specific AST.
-</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="tipsandtricks">Tips and Tricks</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>There is a variety of useful tips and tricks that you come to know after
-working on/with LLVM that aren't obvious at first glance.  Instead of letting
-everyone rediscover them, this section talks about some of these issues.</p>
-
-<!-- ======================================================================= -->
-<h4><a name="offsetofsizeof">Implementing portable offsetof/sizeof</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>One interesting thing that comes up, if you are trying to keep the code 
-generated by your compiler "target independent", is that you often need to know
-the size of some LLVM type or the offset of some field in an llvm structure.
-For example, you might need to pass the size of a type into a function that
-allocates memory.</p>
-
-<p>Unfortunately, this can vary widely across targets: for example the width of
-a pointer is trivially target-specific.  However, there is a <a 
-href="http://nondot.org/sabre/LLVMNotes/SizeOf-OffsetOf-VariableSizedStructs.txt">clever
-way to use the getelementptr instruction</a> that allows you to compute this
-in a portable way.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="gcstack">Garbage Collected Stack Frames</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Some languages want to explicitly manage their stack frames, often so that
-they are garbage collected or to allow easy implementation of closures.  There
-are often better ways to implement these features than explicit stack frames,
-but <a 
-href="http://nondot.org/sabre/LLVMNotes/ExplicitlyManagedStackFrames.txt">LLVM
-does support them,</a> if you want.  It requires your front-end to convert the
-code into <a 
-href="http://en.wikipedia.org/wiki/Continuation-passing_style">Continuation
-Passing Style</a> and the use of tail calls (which LLVM also supports).</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/LangImpl8.rst b/docs/tutorial/LangImpl8.rst
new file mode 100644
index 0000000000..4058991f19
--- /dev/null
+++ b/docs/tutorial/LangImpl8.rst
@@ -0,0 +1,269 @@
+======================================================
+Kaleidoscope: Conclusion and other useful LLVM tidbits
+======================================================
+
+.. contents::
+   :local:
+
+Written by `Chris Lattner <mailto:sabre@nondot.org>`_
+
+Tutorial Conclusion
+===================
+
+Welcome to the final chapter of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. In the course of this tutorial, we have
+grown our little Kaleidoscope language from being a useless toy, to
+being a semi-interesting (but probably still useless) toy. :)
+
+It is interesting to see how far we've come, and how little code it has
+taken. We built the entire lexer, parser, AST, code generator, and an
+interactive run-loop (with a JIT!) by-hand in under 700 lines of
+(non-comment/non-blank) code.
+
+Our little language supports a couple of interesting features: it
+supports user defined binary and unary operators, it uses JIT
+compilation for immediate evaluation, and it supports a few control flow
+constructs with SSA construction.
+
+Part of the idea of this tutorial was to show you how easy and fun it
+can be to define, build, and play with languages. Building a compiler
+need not be a scary or mystical process! Now that you've seen some of
+the basics, I strongly encourage you to take the code and hack on it.
+For example, try adding:
+
+-  **global variables** - While global variables have questional value
+   in modern software engineering, they are often useful when putting
+   together quick little hacks like the Kaleidoscope compiler itself.
+   Fortunately, our current setup makes it very easy to add global
+   variables: just have value lookup check to see if an unresolved
+   variable is in the global variable symbol table before rejecting it.
+   To create a new global variable, make an instance of the LLVM
+   ``GlobalVariable`` class.
+-  **typed variables** - Kaleidoscope currently only supports variables
+   of type double. This gives the language a very nice elegance, because
+   only supporting one type means that you never have to specify types.
+   Different languages have different ways of handling this. The easiest
+   way is to require the user to specify types for every variable
+   definition, and record the type of the variable in the symbol table
+   along with its Value\*.
+-  **arrays, structs, vectors, etc** - Once you add types, you can start
+   extending the type system in all sorts of interesting ways. Simple
+   arrays are very easy and are quite useful for many different
+   applications. Adding them is mostly an exercise in learning how the
+   LLVM `getelementptr <../LangRef.html#i_getelementptr>`_ instruction
+   works: it is so nifty/unconventional, it `has its own
+   FAQ <../GetElementPtr.html>`_! If you add support for recursive types
+   (e.g. linked lists), make sure to read the `section in the LLVM
+   Programmer's Manual <../ProgrammersManual.html#TypeResolve>`_ that
+   describes how to construct them.
+-  **standard runtime** - Our current language allows the user to access
+   arbitrary external functions, and we use it for things like "printd"
+   and "putchard". As you extend the language to add higher-level
+   constructs, often these constructs make the most sense if they are
+   lowered to calls into a language-supplied runtime. For example, if
+   you add hash tables to the language, it would probably make sense to
+   add the routines to a runtime, instead of inlining them all the way.
+-  **memory management** - Currently we can only access the stack in
+   Kaleidoscope. It would also be useful to be able to allocate heap
+   memory, either with calls to the standard libc malloc/free interface
+   or with a garbage collector. If you would like to use garbage
+   collection, note that LLVM fully supports `Accurate Garbage
+   Collection <../GarbageCollection.html>`_ including algorithms that
+   move objects and need to scan/update the stack.
+-  **debugger support** - LLVM supports generation of `DWARF Debug
+   info <../SourceLevelDebugging.html>`_ which is understood by common
+   debuggers like GDB. Adding support for debug info is fairly
+   straightforward. The best way to understand it is to compile some
+   C/C++ code with "``llvm-gcc -g -O0``" and taking a look at what it
+   produces.
+-  **exception handling support** - LLVM supports generation of `zero
+   cost exceptions <../ExceptionHandling.html>`_ which interoperate with
+   code compiled in other languages. You could also generate code by
+   implicitly making every function return an error value and checking
+   it. You could also make explicit use of setjmp/longjmp. There are
+   many different ways to go here.
+-  **object orientation, generics, database access, complex numbers,
+   geometric programming, ...** - Really, there is no end of crazy
+   features that you can add to the language.
+-  **unusual domains** - We've been talking about applying LLVM to a
+   domain that many people are interested in: building a compiler for a
+   specific language. However, there are many other domains that can use
+   compiler technology that are not typically considered. For example,
+   LLVM has been used to implement OpenGL graphics acceleration,
+   translate C++ code to ActionScript, and many other cute and clever
+   things. Maybe you will be the first to JIT compile a regular
+   expression interpreter into native code with LLVM?
+
+Have fun - try doing something crazy and unusual. Building a language
+like everyone else always has, is much less fun than trying something a
+little crazy or off the wall and seeing how it turns out. If you get
+stuck or want to talk about it, feel free to email the `llvmdev mailing
+list <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_: it has lots
+of people who are interested in languages and are often willing to help
+out.
+
+Before we end this tutorial, I want to talk about some "tips and tricks"
+for generating LLVM IR. These are some of the more subtle things that
+may not be obvious, but are very useful if you want to take advantage of
+LLVM's capabilities.
+
+Properties of the LLVM IR
+=========================
+
+We have a couple common questions about code in the LLVM IR form - lets
+just get these out of the way right now, shall we?
+
+Target Independence
+-------------------
+
+Kaleidoscope is an example of a "portable language": any program written
+in Kaleidoscope will work the same way on any target that it runs on.
+Many other languages have this property, e.g. lisp, java, haskell,
+javascript, python, etc (note that while these languages are portable,
+not all their libraries are).
+
+One nice aspect of LLVM is that it is often capable of preserving target
+independence in the IR: you can take the LLVM IR for a
+Kaleidoscope-compiled program and run it on any target that LLVM
+supports, even emitting C code and compiling that on targets that LLVM
+doesn't support natively. You can trivially tell that the Kaleidoscope
+compiler generates target-independent code because it never queries for
+any target-specific information when generating code.
+
+The fact that LLVM provides a compact, target-independent,
+representation for code gets a lot of people excited. Unfortunately,
+these people are usually thinking about C or a language from the C
+family when they are asking questions about language portability. I say
+"unfortunately", because there is really no way to make (fully general)
+C code portable, other than shipping the source code around (and of
+course, C source code is not actually portable in general either - ever
+port a really old application from 32- to 64-bits?).
+
+The problem with C (again, in its full generality) is that it is heavily
+laden with target specific assumptions. As one simple example, the
+preprocessor often destructively removes target-independence from the
+code when it processes the input text:
+
+.. code-block:: c
+
+    #ifdef __i386__
+      int X = 1;
+    #else
+      int X = 42;
+    #endif
+
+While it is possible to engineer more and more complex solutions to
+problems like this, it cannot be solved in full generality in a way that
+is better than shipping the actual source code.
+
+That said, there are interesting subsets of C that can be made portable.
+If you are willing to fix primitive types to a fixed size (say int =
+32-bits, and long = 64-bits), don't care about ABI compatibility with
+existing binaries, and are willing to give up some other minor features,
+you can have portable code. This can make sense for specialized domains
+such as an in-kernel language.
+
+Safety Guarantees
+-----------------
+
+Many of the languages above are also "safe" languages: it is impossible
+for a program written in Java to corrupt its address space and crash the
+process (assuming the JVM has no bugs). Safety is an interesting
+property that requires a combination of language design, runtime
+support, and often operating system support.
+
+It is certainly possible to implement a safe language in LLVM, but LLVM
+IR does not itself guarantee safety. The LLVM IR allows unsafe pointer
+casts, use after free bugs, buffer over-runs, and a variety of other
+problems. Safety needs to be implemented as a layer on top of LLVM and,
+conveniently, several groups have investigated this. Ask on the `llvmdev
+mailing list <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ if
+you are interested in more details.
+
+Language-Specific Optimizations
+-------------------------------
+
+One thing about LLVM that turns off many people is that it does not
+solve all the world's problems in one system (sorry 'world hunger',
+someone else will have to solve you some other day). One specific
+complaint is that people perceive LLVM as being incapable of performing
+high-level language-specific optimization: LLVM "loses too much
+information".
+
+Unfortunately, this is really not the place to give you a full and
+unified version of "Chris Lattner's theory of compiler design". Instead,
+I'll make a few observations:
+
+First, you're right that LLVM does lose information. For example, as of
+this writing, there is no way to distinguish in the LLVM IR whether an
+SSA-value came from a C "int" or a C "long" on an ILP32 machine (other
+than debug info). Both get compiled down to an 'i32' value and the
+information about what it came from is lost. The more general issue
+here, is that the LLVM type system uses "structural equivalence" instead
+of "name equivalence". Another place this surprises people is if you
+have two types in a high-level language that have the same structure
+(e.g. two different structs that have a single int field): these types
+will compile down into a single LLVM type and it will be impossible to
+tell what it came from.
+
+Second, while LLVM does lose information, LLVM is not a fixed target: we
+continue to enhance and improve it in many different ways. In addition
+to adding new features (LLVM did not always support exceptions or debug
+info), we also extend the IR to capture important information for
+optimization (e.g. whether an argument is sign or zero extended,
+information about pointers aliasing, etc). Many of the enhancements are
+user-driven: people want LLVM to include some specific feature, so they
+go ahead and extend it.
+
+Third, it is *possible and easy* to add language-specific optimizations,
+and you have a number of choices in how to do it. As one trivial
+example, it is easy to add language-specific optimization passes that
+"know" things about code compiled for a language. In the case of the C
+family, there is an optimization pass that "knows" about the standard C
+library functions. If you call "exit(0)" in main(), it knows that it is
+safe to optimize that into "return 0;" because C specifies what the
+'exit' function does.
+
+In addition to simple library knowledge, it is possible to embed a
+variety of other language-specific information into the LLVM IR. If you
+have a specific need and run into a wall, please bring the topic up on
+the llvmdev list. At the very worst, you can always treat LLVM as if it
+were a "dumb code generator" and implement the high-level optimizations
+you desire in your front-end, on the language-specific AST.
+
+Tips and Tricks
+===============
+
+There is a variety of useful tips and tricks that you come to know after
+working on/with LLVM that aren't obvious at first glance. Instead of
+letting everyone rediscover them, this section talks about some of these
+issues.
+
+Implementing portable offsetof/sizeof
+-------------------------------------
+
+One interesting thing that comes up, if you are trying to keep the code
+generated by your compiler "target independent", is that you often need
+to know the size of some LLVM type or the offset of some field in an
+llvm structure. For example, you might need to pass the size of a type
+into a function that allocates memory.
+
+Unfortunately, this can vary widely across targets: for example the
+width of a pointer is trivially target-specific. However, there is a
+`clever way to use the getelementptr
+instruction <http://nondot.org/sabre/LLVMNotes/SizeOf-OffsetOf-VariableSizedStructs.txt>`_
+that allows you to compute this in a portable way.
+
+Garbage Collected Stack Frames
+------------------------------
+
+Some languages want to explicitly manage their stack frames, often so
+that they are garbage collected or to allow easy implementation of
+closures. There are often better ways to implement these features than
+explicit stack frames, but `LLVM does support
+them, <http://nondot.org/sabre/LLVMNotes/ExplicitlyManagedStackFrames.txt>`_
+if you want. It requires your front-end to convert the code into
+`Continuation Passing
+Style <http://en.wikipedia.org/wiki/Continuation-passing_style>`_ and
+the use of tail calls (which LLVM also supports).
+
diff --git a/docs/tutorial/OCamlLangImpl1.html b/docs/tutorial/OCamlLangImpl1.html
deleted file mode 100644
index 73fe07bb84..0000000000
--- a/docs/tutorial/OCamlLangImpl1.html
+++ /dev/null
@@ -1,365 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Tutorial Introduction and the Lexer</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <meta name="author" content="Erick Tryzelaar">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Tutorial Introduction and the Lexer</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 1
-  <ol>
-    <li><a href="#intro">Tutorial Introduction</a></li>
-    <li><a href="#language">The Basic Language</a></li>
-    <li><a href="#lexer">The Lexer</a></li>
-  </ol>
-</li>
-<li><a href="OCamlLangImpl2.html">Chapter 2</a>: Implementing a Parser and
-AST</li>
-</ul>
-
-<div class="doc_author">
-	<p>
-		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
-	</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Tutorial Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to the "Implementing a language with LLVM" tutorial.  This tutorial
-runs through the implementation of a simple language, showing how fun and
-easy it can be.  This tutorial will get you up and started as well as help to
-build a framework you can extend to other languages.  The code in this tutorial
-can also be used as a playground to hack on other LLVM specific things.
-</p>
-
-<p>
-The goal of this tutorial is to progressively unveil our language, describing
-how it is built up over time.  This will let us cover a fairly broad range of
-language design and LLVM-specific usage issues, showing and explaining the code
-for it all along the way, without overwhelming you with tons of details up
-front.</p>
-
-<p>It is useful to point out ahead of time that this tutorial is really about
-teaching compiler techniques and LLVM specifically, <em>not</em> about teaching
-modern and sane software engineering principles.  In practice, this means that
-we'll take a number of shortcuts to simplify the exposition.  For example, the
-code leaks memory, uses global variables all over the place, doesn't use nice
-design patterns like <a
-href="http://en.wikipedia.org/wiki/Visitor_pattern">visitors</a>, etc... but it
-is very simple.  If you dig in and use the code as a basis for future projects,
-fixing these deficiencies shouldn't be hard.</p>
-
-<p>I've tried to put this tutorial together in a way that makes chapters easy to
-skip over if you are already familiar with or are uninterested in the various
-pieces.  The structure of the tutorial is:
-</p>
-
-<ul>
-<li><b><a href="#language">Chapter #1</a>: Introduction to the Kaleidoscope
-language, and the definition of its Lexer</b> - This shows where we are going
-and the basic functionality that we want it to do.  In order to make this
-tutorial maximally understandable and hackable, we choose to implement
-everything in Objective Caml instead of using lexer and parser generators.
-LLVM obviously works just fine with such tools, feel free to use one if you
-prefer.</li>
-<li><b><a href="OCamlLangImpl2.html">Chapter #2</a>: Implementing a Parser and
-AST</b> - With the lexer in place, we can talk about parsing techniques and
-basic AST construction.  This tutorial describes recursive descent parsing and
-operator precedence parsing.  Nothing in Chapters 1 or 2 is LLVM-specific,
-the code doesn't even link in LLVM at this point. :)</li>
-<li><b><a href="OCamlLangImpl3.html">Chapter #3</a>: Code generation to LLVM
-IR</b> - With the AST ready, we can show off how easy generation of LLVM IR
-really is.</li>
-<li><b><a href="OCamlLangImpl4.html">Chapter #4</a>: Adding JIT and Optimizer
-Support</b> - Because a lot of people are interested in using LLVM as a JIT,
-we'll dive right into it and show you the 3 lines it takes to add JIT support.
-LLVM is also useful in many other ways, but this is one simple and "sexy" way
-to shows off its power. :)</li>
-<li><b><a href="OCamlLangImpl5.html">Chapter #5</a>: Extending the Language:
-Control Flow</b> - With the language up and running, we show how to extend it
-with control flow operations (if/then/else and a 'for' loop).  This gives us a
-chance to talk about simple SSA construction and control flow.</li>
-<li><b><a href="OCamlLangImpl6.html">Chapter #6</a>: Extending the Language:
-User-defined Operators</b> - This is a silly but fun chapter that talks about
-extending the language to let the user program define their own arbitrary
-unary and binary operators (with assignable precedence!).  This lets us build a
-significant piece of the "language" as library routines.</li>
-<li><b><a href="OCamlLangImpl7.html">Chapter #7</a>: Extending the Language:
-Mutable Variables</b> - This chapter talks about adding user-defined local
-variables along with an assignment operator.  The interesting part about this
-is how easy and trivial it is to construct SSA form in LLVM: no, LLVM does
-<em>not</em> require your front-end to construct SSA form!</li>
-<li><b><a href="OCamlLangImpl8.html">Chapter #8</a>: Conclusion and other
-useful LLVM tidbits</b> - This chapter wraps up the series by talking about
-potential ways to extend the language, but also includes a bunch of pointers to
-info about "special topics" like adding garbage collection support, exceptions,
-debugging, support for "spaghetti stacks", and a bunch of other tips and
-tricks.</li>
-
-</ul>
-
-<p>By the end of the tutorial, we'll have written a bit less than 700 lines of
-non-comment, non-blank, lines of code.  With this small amount of code, we'll
-have built up a very reasonable compiler for a non-trivial language including
-a hand-written lexer, parser, AST, as well as code generation support with a JIT
-compiler.  While other systems may have interesting "hello world" tutorials,
-I think the breadth of this tutorial is a great testament to the strengths of
-LLVM and why you should consider it if you're interested in language or compiler
-design.</p>
-
-<p>A note about this tutorial: we expect you to extend the language and play
-with it on your own.  Take the code and go crazy hacking away at it, compilers
-don't need to be scary creatures - it can be a lot of fun to play with
-languages!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="language">The Basic Language</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>This tutorial will be illustrated with a toy language that we'll call
-"<a href="http://en.wikipedia.org/wiki/Kaleidoscope">Kaleidoscope</a>" (derived
-from "meaning beautiful, form, and view").
-Kaleidoscope is a procedural language that allows you to define functions, use
-conditionals, math, etc.  Over the course of the tutorial, we'll extend
-Kaleidoscope to support the if/then/else construct, a for loop, user defined
-operators, JIT compilation with a simple command line interface, etc.</p>
-
-<p>Because we want to keep things simple, the only datatype in Kaleidoscope is a
-64-bit floating point type (aka 'float' in O'Caml parlance).  As such, all
-values are implicitly double precision and the language doesn't require type
-declarations.  This gives the language a very nice and simple syntax.  For
-example, the following simple example computes <a
-href="http://en.wikipedia.org/wiki/Fibonacci_number">Fibonacci numbers:</a></p>
-
-<div class="doc_code">
-<pre>
-# Compute the x'th fibonacci number.
-def fib(x)
-  if x &lt; 3 then
-    1
-  else
-    fib(x-1)+fib(x-2)
-
-# This expression will compute the 40th number.
-fib(40)
-</pre>
-</div>
-
-<p>We also allow Kaleidoscope to call into standard library functions (the LLVM
-JIT makes this completely trivial).  This means that you can use the 'extern'
-keyword to define a function before you use it (this is also useful for mutually
-recursive functions).  For example:</p>
-
-<div class="doc_code">
-<pre>
-extern sin(arg);
-extern cos(arg);
-extern atan2(arg1 arg2);
-
-atan2(sin(.4), cos(42))
-</pre>
-</div>
-
-<p>A more interesting example is included in Chapter 6 where we write a little
-Kaleidoscope application that <a href="OCamlLangImpl6.html#example">displays
-a Mandelbrot Set</a> at various levels of magnification.</p>
-
-<p>Lets dive into the implementation of this language!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="lexer">The Lexer</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>When it comes to implementing a language, the first thing needed is
-the ability to process a text file and recognize what it says.  The traditional
-way to do this is to use a "<a
-href="http://en.wikipedia.org/wiki/Lexical_analysis">lexer</a>" (aka 'scanner')
-to break the input up into "tokens".  Each token returned by the lexer includes
-a token code and potentially some metadata (e.g. the numeric value of a number).
-First, we define the possibilities:
-</p>
-
-<div class="doc_code">
-<pre>
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-</pre>
-</div>
-
-<p>Each token returned by our lexer will be one of the token variant values.
-An unknown character like '+' will be returned as <tt>Token.Kwd '+'</tt>.  If
-the curr token is an identifier, the value will be <tt>Token.Ident s</tt>.  If
-the current token is a numeric literal (like 1.0), the value will be
-<tt>Token.Number 1.0</tt>.
-</p>
-
-<p>The actual implementation of the lexer is a collection of functions driven
-by a function named <tt>Lexer.lex</tt>.  The <tt>Lexer.lex</tt> function is
-called to return the next token from standard input.  We will use
-<a href="http://caml.inria.fr/pub/docs/manual-camlp4/index.html">Camlp4</a>
-to simplify the tokenization of the standard input.  Its definition starts
-as:</p>
-
-<div class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
-</pre>
-</div>
-
-<p>
-<tt>Lexer.lex</tt> works by recursing over a <tt>char Stream.t</tt> to read
-characters one at a time from the standard input.  It eats them as it recognizes
-them and stores them in in a <tt>Token.token</tt> variant.  The first thing that
-it has to do is ignore whitespace between tokens.  This is accomplished with the
-recursive call above.</p>
-
-<p>The next thing <tt>Lexer.lex</tt> needs to do is recognize identifiers and
-specific keywords like "def".  Kaleidoscope does this with a pattern match
-and a helper function.<p>
-
-<div class="doc_code">
-<pre>
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-...
-
-and lex_ident buffer = parser
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-</pre>
-</div>
-
-<p>Numeric values are similar:</p>
-
-<div class="doc_code">
-<pre>
-  (* number: [0-9.]+ *)
-  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-...
-
-and lex_number buffer = parser
-  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
-</pre>
-</div>
-
-<p>This is all pretty straight-forward code for processing input.  When reading
-a numeric value from input, we use the ocaml <tt>float_of_string</tt> function
-to convert it to a numeric value that we store in <tt>Token.Number</tt>.  Note
-that this isn't doing sufficient error checking: it will raise <tt>Failure</tt>
-if the string "1.23.45.67".  Feel free to extend it :).  Next we handle
-comments:
-</p>
-
-<div class="doc_code">
-<pre>
-  (* Comment until end of line. *)
-  | [&lt; ' ('#'); stream &gt;] -&gt;
-      lex_comment stream
-
-...
-
-and lex_comment = parser
-  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
-  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-</pre>
-</div>
-
-<p>We handle comments by skipping to the end of the line and then return the
-next token.  Finally, if the input doesn't match one of the above cases, it is
-either an operator character like '+' or the end of the file.  These are handled
-with this code:</p>
-
-<div class="doc_code">
-<pre>
-  (* Otherwise, just return the character as its ascii value. *)
-  | [&lt; 'c; stream &gt;] -&gt;
-      [&lt; 'Token.Kwd c; lex stream &gt;]
-
-  (* end of stream. *)
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-</pre>
-</div>
-
-<p>With this, we have the complete lexer for the basic Kaleidoscope language
-(the <a href="OCamlLangImpl2.html#code">full code listing</a> for the Lexer is
-available in the <a href="OCamlLangImpl2.html">next chapter</a> of the
-tutorial).  Next we'll <a href="OCamlLangImpl2.html">build a simple parser that
-uses this to build an Abstract Syntax Tree</a>.  When we have that, we'll
-include a driver so that you can use the lexer and parser together.
-</p>
-
-<a href="OCamlLangImpl2.html">Next: Implementing a Parser and AST</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/OCamlLangImpl1.rst b/docs/tutorial/OCamlLangImpl1.rst
new file mode 100644
index 0000000000..daa482507d
--- /dev/null
+++ b/docs/tutorial/OCamlLangImpl1.rst
@@ -0,0 +1,288 @@
+=================================================
+Kaleidoscope: Tutorial Introduction and the Lexer
+=================================================
+
+.. contents::
+   :local:
+
+Written by `Chris Lattner <mailto:sabre@nondot.org>`_ and `Erick
+Tryzelaar <mailto:idadesub@users.sourceforge.net>`_
+
+Tutorial Introduction
+=====================
+
+Welcome to the "Implementing a language with LLVM" tutorial. This
+tutorial runs through the implementation of a simple language, showing
+how fun and easy it can be. This tutorial will get you up and started as
+well as help to build a framework you can extend to other languages. The
+code in this tutorial can also be used as a playground to hack on other
+LLVM specific things.
+
+The goal of this tutorial is to progressively unveil our language,
+describing how it is built up over time. This will let us cover a fairly
+broad range of language design and LLVM-specific usage issues, showing
+and explaining the code for it all along the way, without overwhelming
+you with tons of details up front.
+
+It is useful to point out ahead of time that this tutorial is really
+about teaching compiler techniques and LLVM specifically, *not* about
+teaching modern and sane software engineering principles. In practice,
+this means that we'll take a number of shortcuts to simplify the
+exposition. For example, the code leaks memory, uses global variables
+all over the place, doesn't use nice design patterns like
+`visitors <http://en.wikipedia.org/wiki/Visitor_pattern>`_, etc... but
+it is very simple. If you dig in and use the code as a basis for future
+projects, fixing these deficiencies shouldn't be hard.
+
+I've tried to put this tutorial together in a way that makes chapters
+easy to skip over if you are already familiar with or are uninterested
+in the various pieces. The structure of the tutorial is:
+
+-  `Chapter #1 <#language>`_: Introduction to the Kaleidoscope
+   language, and the definition of its Lexer - This shows where we are
+   going and the basic functionality that we want it to do. In order to
+   make this tutorial maximally understandable and hackable, we choose
+   to implement everything in Objective Caml instead of using lexer and
+   parser generators. LLVM obviously works just fine with such tools,
+   feel free to use one if you prefer.
+-  `Chapter #2 <OCamlLangImpl2.html>`_: Implementing a Parser and
+   AST - With the lexer in place, we can talk about parsing techniques
+   and basic AST construction. This tutorial describes recursive descent
+   parsing and operator precedence parsing. Nothing in Chapters 1 or 2
+   is LLVM-specific, the code doesn't even link in LLVM at this point.
+   :)
+-  `Chapter #3 <OCamlLangImpl3.html>`_: Code generation to LLVM IR -
+   With the AST ready, we can show off how easy generation of LLVM IR
+   really is.
+-  `Chapter #4 <OCamlLangImpl4.html>`_: Adding JIT and Optimizer
+   Support - Because a lot of people are interested in using LLVM as a
+   JIT, we'll dive right into it and show you the 3 lines it takes to
+   add JIT support. LLVM is also useful in many other ways, but this is
+   one simple and "sexy" way to shows off its power. :)
+-  `Chapter #5 <OCamlLangImpl5.html>`_: Extending the Language:
+   Control Flow - With the language up and running, we show how to
+   extend it with control flow operations (if/then/else and a 'for'
+   loop). This gives us a chance to talk about simple SSA construction
+   and control flow.
+-  `Chapter #6 <OCamlLangImpl6.html>`_: Extending the Language:
+   User-defined Operators - This is a silly but fun chapter that talks
+   about extending the language to let the user program define their own
+   arbitrary unary and binary operators (with assignable precedence!).
+   This lets us build a significant piece of the "language" as library
+   routines.
+-  `Chapter #7 <OCamlLangImpl7.html>`_: Extending the Language:
+   Mutable Variables - This chapter talks about adding user-defined
+   local variables along with an assignment operator. The interesting
+   part about this is how easy and trivial it is to construct SSA form
+   in LLVM: no, LLVM does *not* require your front-end to construct SSA
+   form!
+-  `Chapter #8 <OCamlLangImpl8.html>`_: Conclusion and other useful
+   LLVM tidbits - This chapter wraps up the series by talking about
+   potential ways to extend the language, but also includes a bunch of
+   pointers to info about "special topics" like adding garbage
+   collection support, exceptions, debugging, support for "spaghetti
+   stacks", and a bunch of other tips and tricks.
+
+By the end of the tutorial, we'll have written a bit less than 700 lines
+of non-comment, non-blank, lines of code. With this small amount of
+code, we'll have built up a very reasonable compiler for a non-trivial
+language including a hand-written lexer, parser, AST, as well as code
+generation support with a JIT compiler. While other systems may have
+interesting "hello world" tutorials, I think the breadth of this
+tutorial is a great testament to the strengths of LLVM and why you
+should consider it if you're interested in language or compiler design.
+
+A note about this tutorial: we expect you to extend the language and
+play with it on your own. Take the code and go crazy hacking away at it,
+compilers don't need to be scary creatures - it can be a lot of fun to
+play with languages!
+
+The Basic Language
+==================
+
+This tutorial will be illustrated with a toy language that we'll call
+"`Kaleidoscope <http://en.wikipedia.org/wiki/Kaleidoscope>`_" (derived
+from "meaning beautiful, form, and view"). Kaleidoscope is a procedural
+language that allows you to define functions, use conditionals, math,
+etc. Over the course of the tutorial, we'll extend Kaleidoscope to
+support the if/then/else construct, a for loop, user defined operators,
+JIT compilation with a simple command line interface, etc.
+
+Because we want to keep things simple, the only datatype in Kaleidoscope
+is a 64-bit floating point type (aka 'float' in O'Caml parlance). As
+such, all values are implicitly double precision and the language
+doesn't require type declarations. This gives the language a very nice
+and simple syntax. For example, the following simple example computes
+`Fibonacci numbers: <http://en.wikipedia.org/wiki/Fibonacci_number>`_
+
+::
+
+    # Compute the x'th fibonacci number.
+    def fib(x)
+      if x < 3 then
+        1
+      else
+        fib(x-1)+fib(x-2)
+
+    # This expression will compute the 40th number.
+    fib(40)
+
+We also allow Kaleidoscope to call into standard library functions (the
+LLVM JIT makes this completely trivial). This means that you can use the
+'extern' keyword to define a function before you use it (this is also
+useful for mutually recursive functions). For example:
+
+::
+
+    extern sin(arg);
+    extern cos(arg);
+    extern atan2(arg1 arg2);
+
+    atan2(sin(.4), cos(42))
+
+A more interesting example is included in Chapter 6 where we write a
+little Kaleidoscope application that `displays a Mandelbrot
+Set <OCamlLangImpl6.html#example>`_ at various levels of magnification.
+
+Lets dive into the implementation of this language!
+
+The Lexer
+=========
+
+When it comes to implementing a language, the first thing needed is the
+ability to process a text file and recognize what it says. The
+traditional way to do this is to use a
+"`lexer <http://en.wikipedia.org/wiki/Lexical_analysis>`_" (aka
+'scanner') to break the input up into "tokens". Each token returned by
+the lexer includes a token code and potentially some metadata (e.g. the
+numeric value of a number). First, we define the possibilities:
+
+.. code-block:: ocaml
+
+    (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+     * these others for known things. *)
+    type token =
+      (* commands *)
+      | Def | Extern
+
+      (* primary *)
+      | Ident of string | Number of float
+
+      (* unknown *)
+      | Kwd of char
+
+Each token returned by our lexer will be one of the token variant
+values. An unknown character like '+' will be returned as
+``Token.Kwd '+'``. If the curr token is an identifier, the value will be
+``Token.Ident s``. If the current token is a numeric literal (like 1.0),
+the value will be ``Token.Number 1.0``.
+
+The actual implementation of the lexer is a collection of functions
+driven by a function named ``Lexer.lex``. The ``Lexer.lex`` function is
+called to return the next token from standard input. We will use
+`Camlp4 <http://caml.inria.fr/pub/docs/manual-camlp4/index.html>`_ to
+simplify the tokenization of the standard input. Its definition starts
+as:
+
+.. code-block:: ocaml
+
+    (*===----------------------------------------------------------------------===
+     * Lexer
+     *===----------------------------------------------------------------------===*)
+
+    let rec lex = parser
+      (* Skip any whitespace. *)
+      | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+``Lexer.lex`` works by recursing over a ``char Stream.t`` to read
+characters one at a time from the standard input. It eats them as it
+recognizes them and stores them in in a ``Token.token`` variant. The
+first thing that it has to do is ignore whitespace between tokens. This
+is accomplished with the recursive call above.
+
+The next thing ``Lexer.lex`` needs to do is recognize identifiers and
+specific keywords like "def". Kaleidoscope does this with a pattern
+match and a helper function.
+
+.. code-block:: ocaml
+
+      (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+      | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+          let buffer = Buffer.create 1 in
+          Buffer.add_char buffer c;
+          lex_ident buffer stream
+
+    ...
+
+    and lex_ident buffer = parser
+      | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+          Buffer.add_char buffer c;
+          lex_ident buffer stream
+      | [< stream=lex >] ->
+          match Buffer.contents buffer with
+          | "def" -> [< 'Token.Def; stream >]
+          | "extern" -> [< 'Token.Extern; stream >]
+          | id -> [< 'Token.Ident id; stream >]
+
+Numeric values are similar:
+
+.. code-block:: ocaml
+
+      (* number: [0-9.]+ *)
+      | [< ' ('0' .. '9' as c); stream >] ->
+          let buffer = Buffer.create 1 in
+          Buffer.add_char buffer c;
+          lex_number buffer stream
+
+    ...
+
+    and lex_number buffer = parser
+      | [< ' ('0' .. '9' | '.' as c); stream >] ->
+          Buffer.add_char buffer c;
+          lex_number buffer stream
+      | [< stream=lex >] ->
+          [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+This is all pretty straight-forward code for processing input. When
+reading a numeric value from input, we use the ocaml ``float_of_string``
+function to convert it to a numeric value that we store in
+``Token.Number``. Note that this isn't doing sufficient error checking:
+it will raise ``Failure`` if the string "1.23.45.67". Feel free to
+extend it :). Next we handle comments:
+
+.. code-block:: ocaml
+
+      (* Comment until end of line. *)
+      | [< ' ('#'); stream >] ->
+          lex_comment stream
+
+    ...
+
+    and lex_comment = parser
+      | [< ' ('\n'); stream=lex >] -> stream
+      | [< 'c; e=lex_comment >] -> e
+      | [< >] -> [< >]
+
+We handle comments by skipping to the end of the line and then return
+the next token. Finally, if the input doesn't match one of the above
+cases, it is either an operator character like '+' or the end of the
+file. These are handled with this code:
+
+.. code-block:: ocaml
+
+      (* Otherwise, just return the character as its ascii value. *)
+      | [< 'c; stream >] ->
+          [< 'Token.Kwd c; lex stream >]
+
+      (* end of stream. *)
+      | [< >] -> [< >]
+
+With this, we have the complete lexer for the basic Kaleidoscope
+language (the `full code listing <OCamlLangImpl2.html#code>`_ for the
+Lexer is available in the `next chapter <OCamlLangImpl2.html>`_ of the
+tutorial). Next we'll `build a simple parser that uses this to build an
+Abstract Syntax Tree <OCamlLangImpl2.html>`_. When we have that, we'll
+include a driver so that you can use the lexer and parser together.
+
+`Next: Implementing a Parser and AST <OCamlLangImpl2.html>`_
+
diff --git a/docs/tutorial/OCamlLangImpl2.html b/docs/tutorial/OCamlLangImpl2.html
deleted file mode 100644
index dd7e07b422..0000000000
--- a/docs/tutorial/OCamlLangImpl2.html
+++ /dev/null
@@ -1,1043 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Implementing a Parser and AST</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <meta name="author" content="Erick Tryzelaar">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Implementing a Parser and AST</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 2
-  <ol>
-    <li><a href="#intro">Chapter 2 Introduction</a></li>
-    <li><a href="#ast">The Abstract Syntax Tree (AST)</a></li>
-    <li><a href="#parserbasics">Parser Basics</a></li>
-    <li><a href="#parserprimexprs">Basic Expression Parsing</a></li>
-    <li><a href="#parserbinops">Binary Expression Parsing</a></li>
-    <li><a href="#parsertop">Parsing the Rest</a></li>
-    <li><a href="#driver">The Driver</a></li>
-    <li><a href="#conclusions">Conclusions</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="OCamlLangImpl3.html">Chapter 3</a>: Code generation to LLVM IR</li>
-</ul>
-
-<div class="doc_author">
-	<p>
-		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
-	</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 2 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 2 of the "<a href="index.html">Implementing a language
-with LLVM in Objective Caml</a>" tutorial.  This chapter shows you how to use
-the lexer, built in <a href="OCamlLangImpl1.html">Chapter 1</a>, to build a
-full <a href="http://en.wikipedia.org/wiki/Parsing">parser</a> for our
-Kaleidoscope language.  Once we have a parser, we'll define and build an <a
-href="http://en.wikipedia.org/wiki/Abstract_syntax_tree">Abstract Syntax
-Tree</a> (AST).</p>
-
-<p>The parser we will build uses a combination of <a
-href="http://en.wikipedia.org/wiki/Recursive_descent_parser">Recursive Descent
-Parsing</a> and <a href=
-"http://en.wikipedia.org/wiki/Operator-precedence_parser">Operator-Precedence
-Parsing</a> to parse the Kaleidoscope language (the latter for
-binary expressions and the former for everything else).  Before we get to
-parsing though, lets talk about the output of the parser: the Abstract Syntax
-Tree.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="ast">The Abstract Syntax Tree (AST)</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The AST for a program captures its behavior in such a way that it is easy for
-later stages of the compiler (e.g. code generation) to interpret.  We basically
-want one object for each construct in the language, and the AST should closely
-model the language.  In Kaleidoscope, we have expressions, a prototype, and a
-function object.  We'll start with expressions first:</p>
-
-<div class="doc_code">
-<pre>
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-</pre>
-</div>
-
-<p>The code above shows the definition of the base ExprAST class and one
-subclass which we use for numeric literals.  The important thing to note about
-this code is that the Number variant captures the numeric value of the
-literal as an instance variable. This allows later phases of the compiler to
-know what the stored numeric value is.</p>
-
-<p>Right now we only create the AST,  so there are no useful functions on
-them.  It would be very easy to add a function to pretty print the code,
-for example.  Here are the other expression AST node definitions that we'll use
-in the basic form of the Kaleidoscope language:
-</p>
-
-<div class="doc_code">
-<pre>
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-</pre>
-</div>
-
-<p>This is all (intentionally) rather straight-forward: variables capture the
-variable name, binary operators capture their opcode (e.g. '+'), and calls
-capture a function name as well as a list of any argument expressions.  One thing
-that is nice about our AST is that it captures the language features without
-talking about the syntax of the language.  Note that there is no discussion about
-precedence of binary operators, lexical structure, etc.</p>
-
-<p>For our basic language, these are all of the expression nodes we'll define.
-Because it doesn't have conditional control flow, it isn't Turing-complete;
-we'll fix that in a later installment.  The two things we need next are a way
-to talk about the interface to a function, and a way to talk about functions
-themselves:</p>
-
-<div class="doc_code">
-<pre>
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto = Prototype of string * string array
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-</pre>
-</div>
-
-<p>In Kaleidoscope, functions are typed with just a count of their arguments.
-Since all values are double precision floating point, the type of each argument
-doesn't need to be stored anywhere.  In a more aggressive and realistic
-language, the "expr" variants would probably have a type field.</p>
-
-<p>With this scaffolding, we can now talk about parsing expressions and function
-bodies in Kaleidoscope.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="parserbasics">Parser Basics</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Now that we have an AST to build, we need to define the parser code to build
-it.  The idea here is that we want to parse something like "x+y" (which is
-returned as three tokens by the lexer) into an AST that could be generated with
-calls like this:</p>
-
-<div class="doc_code">
-<pre>
-  let x = Variable "x" in
-  let y = Variable "y" in
-  let result = Binary ('+', x, y) in
-  ...
-</pre>
-</div>
-
-<p>
-The error handling routines make use of the builtin <tt>Stream.Failure</tt> and
-<tt>Stream.Error</tt>s.  <tt>Stream.Failure</tt> is raised when the parser is
-unable to find any matching token in the first position of a pattern.
-<tt>Stream.Error</tt> is raised when the first token matches, but the rest do
-not.  The error recovery in our parser will not be the best and is not
-particular user-friendly, but it will be enough for our tutorial.  These
-exceptions make it easier to handle errors in routines that have various return
-types.</p>
-
-<p>With these basic types and exceptions, we can implement the first
-piece of our grammar: numeric literals.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="parserprimexprs">Basic Expression Parsing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>We start with numeric literals, because they are the simplest to process.
-For each production in our grammar, we'll define a function which parses that
-production.  We call this class of expressions "primary" expressions, for
-reasons that will become more clear <a href="OCamlLangImpl6.html#unary">
-later in the tutorial</a>.  In order to parse an arbitrary primary expression,
-we need to determine what sort of expression it is.  For numeric literals, we
-have:</p>
-
-<div class="doc_code">
-<pre>
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr *)
-parse_primary = parser
-  (* numberexpr ::= number *)
-  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
-</pre>
-</div>
-
-<p>This routine is very simple: it expects to be called when the current token
-is a <tt>Token.Number</tt> token.  It takes the current number value, creates
-a <tt>Ast.Number</tt> node, advances the lexer to the next token, and finally
-returns.</p>
-
-<p>There are some interesting aspects to this.  The most important one is that
-this routine eats all of the tokens that correspond to the production and
-returns the lexer buffer with the next token (which is not part of the grammar
-production) ready to go.  This is a fairly standard way to go for recursive
-descent parsers.  For a better example, the parenthesis operator is defined like
-this:</p>
-
-<div class="doc_code">
-<pre>
-  (* parenexpr ::= '(' expression ')' *)
-  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
-</pre>
-</div>
-
-<p>This function illustrates a number of interesting things about the
-parser:</p>
-
-<p>
-1) It shows how we use the <tt>Stream.Error</tt> exception.  When called, this
-function expects that the current token is a '(' token, but after parsing the
-subexpression, it is possible that there is no ')' waiting.  For example, if
-the user types in "(4 x" instead of "(4)", the parser should emit an error.
-Because errors can occur, the parser needs a way to indicate that they
-happened. In our parser, we use the camlp4 shortcut syntax <tt>token ?? "parse
-error"</tt>, where if the token before the <tt>??</tt> does not match, then
-<tt>Stream.Error "parse error"</tt> will be raised.</p>
-
-<p>2) Another interesting aspect of this function is that it uses recursion by
-calling <tt>Parser.parse_primary</tt> (we will soon see that
-<tt>Parser.parse_primary</tt> can call <tt>Parser.parse_primary</tt>).  This is
-powerful because it allows us to handle recursive grammars, and keeps each
-production very simple.  Note that parentheses do not cause construction of AST
-nodes themselves.  While we could do it this way, the most important role of
-parentheses are to guide the parser and provide grouping.  Once the parser
-constructs the AST, parentheses are not needed.</p>
-
-<p>The next simple production is for handling variable references and function
-calls:</p>
-
-<div class="doc_code">
-<pre>
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
-      let rec parse_args accumulator = parser
-        | [&lt; e=parse_expr; stream &gt;] -&gt;
-            begin parser
-              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
-              | [&lt; &gt;] -&gt; e :: accumulator
-            end stream
-        | [&lt; &gt;] -&gt; accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [&lt; 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [&lt; &gt;] -&gt; Ast.Variable id
-      in
-      parse_ident id stream
-</pre>
-</div>
-
-<p>This routine follows the same style as the other routines.  (It expects to be
-called if the current token is a <tt>Token.Ident</tt> token).  It also has
-recursion and error handling.  One interesting aspect of this is that it uses
-<em>look-ahead</em> to determine if the current identifier is a stand alone
-variable reference or if it is a function call expression.  It handles this by
-checking to see if the token after the identifier is a '(' token, constructing
-either a <tt>Ast.Variable</tt> or <tt>Ast.Call</tt> node as appropriate.
-</p>
-
-<p>We finish up by raising an exception if we received a token we didn't
-expect:</p>
-
-<div class="doc_code">
-<pre>
-  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
-</pre>
-</div>
-
-<p>Now that basic expressions are handled, we need to handle binary expressions.
-They are a bit more complex.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="parserbinops">Binary Expression Parsing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Binary expressions are significantly harder to parse because they are often
-ambiguous.  For example, when given the string "x+y*z", the parser can choose
-to parse it as either "(x+y)*z" or "x+(y*z)".  With common definitions from
-mathematics, we expect the later parse, because "*" (multiplication) has
-higher <em>precedence</em> than "+" (addition).</p>
-
-<p>There are many ways to handle this, but an elegant and efficient way is to
-use <a href=
-"http://en.wikipedia.org/wiki/Operator-precedence_parser">Operator-Precedence
-Parsing</a>.  This parsing technique uses the precedence of binary operators to
-guide recursion.  To start with, we need a table of precedences:</p>
-
-<div class="doc_code">
-<pre>
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
-
-...
-
-let main () =
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '&lt;' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-  ...
-</pre>
-</div>
-
-<p>For the basic form of Kaleidoscope, we will only support 4 binary operators
-(this can obviously be extended by you, our brave and intrepid reader).  The
-<tt>Parser.precedence</tt> function returns the precedence for the current
-token, or -1 if the token is not a binary operator.  Having a <tt>Hashtbl.t</tt>
-makes it easy to add new operators and makes it clear that the algorithm doesn't
-depend on the specific operators involved, but it would be easy enough to
-eliminate the <tt>Hashtbl.t</tt> and do the comparisons in the
-<tt>Parser.precedence</tt> function.  (Or just use a fixed-size array).</p>
-
-<p>With the helper above defined, we can now start parsing binary expressions.
-The basic idea of operator precedence parsing is to break down an expression
-with potentially ambiguous binary operators into pieces.  Consider ,for example,
-the expression "a+b+(c+d)*e*f+g".  Operator precedence parsing considers this
-as a stream of primary expressions separated by binary operators.  As such,
-it will first parse the leading primary expression "a", then it will see the
-pairs [+, b] [+, (c+d)] [*, e] [*, f] and [+, g].  Note that because parentheses
-are primary expressions, the binary expression parser doesn't need to worry
-about nested subexpressions like (c+d) at all.
-</p>
-
-<p>
-To start, an expression is a primary expression potentially followed by a
-sequence of [binop,primaryexpr] pairs:</p>
-
-<div class="doc_code">
-<pre>
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [&lt; lhs=parse_primary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
-</pre>
-</div>
-
-<p><tt>Parser.parse_bin_rhs</tt> is the function that parses the sequence of
-pairs for us.  It takes a precedence and a pointer to an expression for the part
-that has been parsed so far.   Note that "x" is a perfectly valid expression: As
-such, "binoprhs" is allowed to be empty, in which case it returns the expression
-that is passed into it. In our example above, the code passes the expression for
-"a" into <tt>Parser.parse_bin_rhs</tt> and the current token is "+".</p>
-
-<p>The precedence value passed into <tt>Parser.parse_bin_rhs</tt> indicates the
-<em>minimal operator precedence</em> that the function is allowed to eat.  For
-example, if the current pair stream is [+, x] and <tt>Parser.parse_bin_rhs</tt>
-is passed in a precedence of 40, it will not consume any tokens (because the
-precedence of '+' is only 20).  With this in mind, <tt>Parser.parse_bin_rhs</tt>
-starts with:</p>
-
-<div class="doc_code">
-<pre>
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec &lt; expr_prec then lhs else begin
-</pre>
-</div>
-
-<p>This code gets the precedence of the current token and checks to see if if is
-too low.  Because we defined invalid tokens to have a precedence of -1, this
-check implicitly knows that the pair-stream ends when the token stream runs out
-of binary operators.  If this check succeeds, we know that the token is a binary
-operator and that it will be included in this expression:</p>
-
-<div class="doc_code">
-<pre>
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) -&gt;
-</pre>
-</div>
-
-<p>As such, this code eats (and remembers) the binary operator and then parses
-the primary expression that follows.  This builds up the whole pair, the first of
-which is [+, b] for the running example.</p>
-
-<p>Now that we parsed the left-hand side of an expression and one pair of the
-RHS sequence, we have to decide which way the expression associates.  In
-particular, we could have "(a+b) binop unparsed"  or "a + (b binop unparsed)".
-To determine this, we look ahead at "binop" to determine its precedence and
-compare it to BinOp's precedence (which is '+' in this case):</p>
-
-<div class="doc_code">
-<pre>
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec &lt; next_prec
-</pre>
-</div>
-
-<p>If the precedence of the binop to the right of "RHS" is lower or equal to the
-precedence of our current operator, then we know that the parentheses associate
-as "(a+b) binop ...".  In our example, the current operator is "+" and the next
-operator is "+", we know that they have the same precedence.  In this case we'll
-create the AST node for "a+b", and then continue parsing:</p>
-
-<div class="doc_code">
-<pre>
-          ... if body omitted ...
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-</pre>
-</div>
-
-<p>In our example above, this will turn "a+b+" into "(a+b)" and execute the next
-iteration of the loop, with "+" as the current token.  The code above will eat,
-remember, and parse "(c+d)" as the primary expression, which makes the
-current pair equal to [+, (c+d)].  It will then evaluate the 'if' conditional above with
-"*" as the binop to the right of the primary.  In this case, the precedence of "*" is
-higher than the precedence of "+" so the if condition will be entered.</p>
-
-<p>The critical question left here is "how can the if condition parse the right
-hand side in full"?  In particular, to build the AST correctly for our example,
-it needs to get all of "(c+d)*e*f" as the RHS expression variable.  The code to
-do this is surprisingly simple (code from the above two blocks duplicated for
-context):</p>
-
-<div class="doc_code">
-<pre>
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) -&gt;
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              if token_prec &lt; precedence c2
-              then <b>parse_bin_rhs (token_prec + 1) rhs stream</b>
-              else rhs
-          | _ -&gt; rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-</pre>
-</div>
-
-<p>At this point, we know that the binary operator to the RHS of our primary
-has higher precedence than the binop we are currently parsing.  As such, we know
-that any sequence of pairs whose operators are all higher precedence than "+"
-should be parsed together and returned as "RHS".  To do this, we recursively
-invoke the <tt>Parser.parse_bin_rhs</tt> function specifying "token_prec+1" as
-the minimum precedence required for it to continue.  In our example above, this
-will cause it to return the AST node for "(c+d)*e*f" as RHS, which is then set
-as the RHS of the '+' expression.</p>
-
-<p>Finally, on the next iteration of the while loop, the "+g" piece is parsed
-and added to the AST.  With this little bit of code (14 non-trivial lines), we
-correctly handle fully general binary expression parsing in a very elegant way.
-This was a whirlwind tour of this code, and it is somewhat subtle.  I recommend
-running through it with a few tough examples to see how it works.
-</p>
-
-<p>This wraps up handling of expressions.  At this point, we can point the
-parser at an arbitrary token stream and build an expression from it, stopping
-at the first token that is not part of the expression.  Next up we need to
-handle function definitions, etc.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="parsertop">Parsing the Rest</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-The next thing missing is handling of function prototypes.  In Kaleidoscope,
-these are used both for 'extern' function declarations as well as function body
-definitions.  The code to do this is straight-forward and not very interesting
-(once you've survived expressions):
-</p>
-
-<div class="doc_code">
-<pre>
-(* prototype
- *   ::= id '(' id* ')' *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-</pre>
-</div>
-
-<p>Given this, a function definition is very simple, just a prototype plus
-an expression to implement the body:</p>
-
-<div class="doc_code">
-<pre>
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
-      Ast.Function (p, e)
-</pre>
-</div>
-
-<p>In addition, we support 'extern' to declare functions like 'sin' and 'cos' as
-well as to support forward declaration of user functions.  These 'extern's are just
-prototypes with no body:</p>
-
-<div class="doc_code">
-<pre>
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
-</pre>
-</div>
-
-<p>Finally, we'll also let the user type in arbitrary top-level expressions and
-evaluate them on the fly.  We will handle this by defining anonymous nullary
-(zero argument) functions for them:</p>
-
-<div class="doc_code">
-<pre>
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [&lt; e=parse_expr &gt;] -&gt;
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-</pre>
-</div>
-
-<p>Now that we have all the pieces, let's build a little driver that will let us
-actually <em>execute</em> this code we've built!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="driver">The Driver</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The driver for this simply invokes all of the parsing pieces with a top-level
-dispatch loop.  There isn't much interesting here, so I'll just include the
-top-level loop.  See <a href="#code">below</a> for full code in the "Top-Level
-Parsing" section.</p>
-
-<div class="doc_code">
-<pre>
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop stream =
-  match Stream.peek stream with
-  | None -&gt; ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') -&gt;
-      Stream.junk stream;
-      main_loop stream
-
-  | Some token -&gt;
-      begin
-        try match token with
-        | Token.Def -&gt;
-            ignore(Parser.parse_definition stream);
-            print_endline "parsed a function definition.";
-        | Token.Extern -&gt;
-            ignore(Parser.parse_extern stream);
-            print_endline "parsed an extern.";
-        | _ -&gt;
-            (* Evaluate a top-level expression into an anonymous function. *)
-            ignore(Parser.parse_toplevel stream);
-            print_endline "parsed a top-level expr";
-        with Stream.Error s -&gt;
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready&gt; "; flush stdout;
-      main_loop stream
-</pre>
-</div>
-
-<p>The most interesting part of this is that we ignore top-level semicolons.
-Why is this, you ask?  The basic reason is that if you type "4 + 5" at the
-command line, the parser doesn't know whether that is the end of what you will type
-or not.  For example, on the next line you could type "def foo..." in which case
-4+5 is the end of a top-level expression.  Alternatively you could type "* 6",
-which would continue the expression.  Having top-level semicolons allows you to
-type "4+5;", and the parser will know you are done.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="conclusions">Conclusions</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>With just under 300 lines of commented code (240 lines of non-comment,
-non-blank code), we fully defined our minimal language, including a lexer,
-parser, and AST builder.  With this done, the executable will validate
-Kaleidoscope code and tell us if it is grammatically invalid.  For
-example, here is a sample interaction:</p>
-
-<div class="doc_code">
-<pre>
-$ <b>./toy.byte</b>
-ready&gt; <b>def foo(x y) x+foo(y, 4.0);</b>
-Parsed a function definition.
-ready&gt; <b>def foo(x y) x+y y;</b>
-Parsed a function definition.
-Parsed a top-level expr
-ready&gt; <b>def foo(x y) x+y );</b>
-Parsed a function definition.
-Error: unknown token when expecting an expression
-ready&gt; <b>extern sin(a);</b>
-ready&gt; Parsed an extern
-ready&gt; <b>^D</b>
-$
-</pre>
-</div>
-
-<p>There is a lot of room for extension here.  You can define new AST nodes,
-extend the language in many ways, etc.  In the <a href="OCamlLangImpl3.html">
-next installment</a>, we will describe how to generate LLVM Intermediate
-Representation (IR) from the AST.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for this and the previous chapter.
-Note that it is fully self-contained: you don't need LLVM or any external
-libraries at all for this.  (Besides the ocaml standard libraries, of
-course.)  To build this, just compile with:</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<dl>
-<dt>_tags:</dt>
-<dd class="doc_code">
-<pre>
-&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
-</pre>
-</dd>
-
-<dt>token.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-</pre>
-</dd>
-
-<dt>lexer.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [&lt; ' ('#'); stream &gt;] -&gt;
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [&lt; 'c; stream &gt;] -&gt;
-      [&lt; 'Token.Kwd c; lex stream &gt;]
-
-  (* end of stream. *)
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-
-and lex_number buffer = parser
-  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
-
-and lex_ident buffer = parser
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-
-and lex_comment = parser
-  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
-  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-</pre>
-</dd>
-
-<dt>ast.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto = Prototype of string * string array
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-</pre>
-</dd>
-
-<dt>parser.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
-      let rec parse_args accumulator = parser
-        | [&lt; e=parse_expr; stream &gt;] -&gt;
-            begin parser
-              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
-              | [&lt; &gt;] -&gt; e :: accumulator
-            end stream
-        | [&lt; &gt;] -&gt; accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [&lt; 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [&lt; &gt;] -&gt; Ast.Variable id
-      in
-      parse_ident id stream
-
-  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec &lt; expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the primary expression after the binary operator. *)
-        let rhs = parse_primary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) -&gt;
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec &lt; next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -&gt; rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -&gt; lhs
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [&lt; lhs=parse_primary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')' *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [&lt; e=parse_expr &gt;] -&gt;
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
-</pre>
-</dd>
-
-<dt>toplevel.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop stream =
-  match Stream.peek stream with
-  | None -&gt; ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') -&gt;
-      Stream.junk stream;
-      main_loop stream
-
-  | Some token -&gt;
-      begin
-        try match token with
-        | Token.Def -&gt;
-            ignore(Parser.parse_definition stream);
-            print_endline "parsed a function definition.";
-        | Token.Extern -&gt;
-            ignore(Parser.parse_extern stream);
-            print_endline "parsed an extern.";
-        | _ -&gt;
-            (* Evaluate a top-level expression into an anonymous function. *)
-            ignore(Parser.parse_toplevel stream);
-            print_endline "parsed a top-level expr";
-        with Stream.Error s -&gt;
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready&gt; "; flush stdout;
-      main_loop stream
-</pre>
-</dd>
-
-<dt>toy.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-let main () =
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '&lt;' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready&gt; "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop stream;
-;;
-
-main ()
-</pre>
-</dd>
-</dl>
-
-<a href="OCamlLangImpl3.html">Next: Implementing Code Generation to LLVM IR</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-  <a href="mailto:erickt@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/OCamlLangImpl2.rst b/docs/tutorial/OCamlLangImpl2.rst
new file mode 100644
index 0000000000..07490e1f67
--- /dev/null
+++ b/docs/tutorial/OCamlLangImpl2.rst
@@ -0,0 +1,899 @@
+===========================================
+Kaleidoscope: Implementing a Parser and AST
+===========================================
+
+.. contents::
+   :local:
+
+Written by `Chris Lattner <mailto:sabre@nondot.org>`_ and `Erick
+Tryzelaar <mailto:idadesub@users.sourceforge.net>`_
+
+Chapter 2 Introduction
+======================
+
+Welcome to Chapter 2 of the "`Implementing a language with LLVM in
+Objective Caml <index.html>`_" tutorial. This chapter shows you how to
+use the lexer, built in `Chapter 1 <OCamlLangImpl1.html>`_, to build a
+full `parser <http://en.wikipedia.org/wiki/Parsing>`_ for our
+Kaleidoscope language. Once we have a parser, we'll define and build an
+`Abstract Syntax
+Tree <http://en.wikipedia.org/wiki/Abstract_syntax_tree>`_ (AST).
+
+The parser we will build uses a combination of `Recursive Descent
+Parsing <http://en.wikipedia.org/wiki/Recursive_descent_parser>`_ and
+`Operator-Precedence
+Parsing <http://en.wikipedia.org/wiki/Operator-precedence_parser>`_ to
+parse the Kaleidoscope language (the latter for binary expressions and
+the former for everything else). Before we get to parsing though, lets
+talk about the output of the parser: the Abstract Syntax Tree.
+
+The Abstract Syntax Tree (AST)
+==============================
+
+The AST for a program captures its behavior in such a way that it is
+easy for later stages of the compiler (e.g. code generation) to
+interpret. We basically want one object for each construct in the
+language, and the AST should closely model the language. In
+Kaleidoscope, we have expressions, a prototype, and a function object.
+We'll start with expressions first:
+
+.. code-block:: ocaml
+
+    (* expr - Base type for all expression nodes. *)
+    type expr =
+      (* variant for numeric literals like "1.0". *)
+      | Number of float
+
+The code above shows the definition of the base ExprAST class and one
+subclass which we use for numeric literals. The important thing to note
+about this code is that the Number variant captures the numeric value of
+the literal as an instance variable. This allows later phases of the
+compiler to know what the stored numeric value is.
+
+Right now we only create the AST, so there are no useful functions on
+them. It would be very easy to add a function to pretty print the code,
+for example. Here are the other expression AST node definitions that
+we'll use in the basic form of the Kaleidoscope language:
+
+.. code-block:: ocaml
+
+      (* variant for referencing a variable, like "a". *)
+      | Variable of string
+
+      (* variant for a binary operator. *)
+      | Binary of char * expr * expr
+
+      (* variant for function calls. *)
+      | Call of string * expr array
+
+This is all (intentionally) rather straight-forward: variables capture
+the variable name, binary operators capture their opcode (e.g. '+'), and
+calls capture a function name as well as a list of any argument
+expressions. One thing that is nice about our AST is that it captures
+the language features without talking about the syntax of the language.
+Note that there is no discussion about precedence of binary operators,
+lexical structure, etc.
+
+For our basic language, these are all of the expression nodes we'll
+define. Because it doesn't have conditional control flow, it isn't
+Turing-complete; we'll fix that in a later installment. The two things
+we need next are a way to talk about the interface to a function, and a
+way to talk about functions themselves:
+
+.. code-block:: ocaml
+
+    (* proto - This type represents the "prototype" for a function, which captures
+     * its name, and its argument names (thus implicitly the number of arguments the
+     * function takes). *)
+    type proto = Prototype of string * string array
+
+    (* func - This type represents a function definition itself. *)
+    type func = Function of proto * expr
+
+In Kaleidoscope, functions are typed with just a count of their
+arguments. Since all values are double precision floating point, the
+type of each argument doesn't need to be stored anywhere. In a more
+aggressive and realistic language, the "expr" variants would probably
+have a type field.
+
+With this scaffolding, we can now talk about parsing expressions and
+function bodies in Kaleidoscope.
+
+Parser Basics
+=============
+
+Now that we have an AST to build, we need to define the parser code to
+build it. The idea here is that we want to parse something like "x+y"
+(which is returned as three tokens by the lexer) into an AST that could
+be generated with calls like this:
+
+.. code-block:: ocaml
+
+      let x = Variable "x" in
+      let y = Variable "y" in
+      let result = Binary ('+', x, y) in
+      ...
+
+The error handling routines make use of the builtin ``Stream.Failure``
+and ``Stream.Error``s. ``Stream.Failure`` is raised when the parser is
+unable to find any matching token in the first position of a pattern.
+``Stream.Error`` is raised when the first token matches, but the rest do
+not. The error recovery in our parser will not be the best and is not
+particular user-friendly, but it will be enough for our tutorial. These
+exceptions make it easier to handle errors in routines that have various
+return types.
+
+With these basic types and exceptions, we can implement the first piece
+of our grammar: numeric literals.
+
+Basic Expression Parsing
+========================
+
+We start with numeric literals, because they are the simplest to
+process. For each production in our grammar, we'll define a function
+which parses that production. We call this class of expressions
+"primary" expressions, for reasons that will become more clear `later in
+the tutorial <OCamlLangImpl6.html#unary>`_. In order to parse an
+arbitrary primary expression, we need to determine what sort of
+expression it is. For numeric literals, we have:
+
+.. code-block:: ocaml
+
+    (* primary
+     *   ::= identifier
+     *   ::= numberexpr
+     *   ::= parenexpr *)
+    parse_primary = parser
+      (* numberexpr ::= number *)
+      | [< 'Token.Number n >] -> Ast.Number n
+
+This routine is very simple: it expects to be called when the current
+token is a ``Token.Number`` token. It takes the current number value,
+creates a ``Ast.Number`` node, advances the lexer to the next token, and
+finally returns.
+
+There are some interesting aspects to this. The most important one is
+that this routine eats all of the tokens that correspond to the
+production and returns the lexer buffer with the next token (which is
+not part of the grammar production) ready to go. This is a fairly
+standard way to go for recursive descent parsers. For a better example,
+the parenthesis operator is defined like this:
+
+.. code-block:: ocaml
+
+      (* parenexpr ::= '(' expression ')' *)
+      | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+This function illustrates a number of interesting things about the
+parser:
+
+1) It shows how we use the ``Stream.Error`` exception. When called, this
+function expects that the current token is a '(' token, but after
+parsing the subexpression, it is possible that there is no ')' waiting.
+For example, if the user types in "(4 x" instead of "(4)", the parser
+should emit an error. Because errors can occur, the parser needs a way
+to indicate that they happened. In our parser, we use the camlp4
+shortcut syntax ``token ?? "parse error"``, where if the token before
+the ``??`` does not match, then ``Stream.Error "parse error"`` will be
+raised.
+
+2) Another interesting aspect of this function is that it uses recursion
+by calling ``Parser.parse_primary`` (we will soon see that
+``Parser.parse_primary`` can call ``Parser.parse_primary``). This is
+powerful because it allows us to handle recursive grammars, and keeps
+each production very simple. Note that parentheses do not cause
+construction of AST nodes themselves. While we could do it this way, the
+most important role of parentheses are to guide the parser and provide
+grouping. Once the parser constructs the AST, parentheses are not
+needed.
+
+The next simple production is for handling variable references and
+function calls:
+
+.. code-block:: ocaml
+
+      (* identifierexpr
+       *   ::= identifier
+       *   ::= identifier '(' argumentexpr ')' *)
+      | [< 'Token.Ident id; stream >] ->
+          let rec parse_args accumulator = parser
+            | [< e=parse_expr; stream >] ->
+                begin parser
+                  | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+                  | [< >] -> e :: accumulator
+                end stream
+            | [< >] -> accumulator
+          in
+          let rec parse_ident id = parser
+            (* Call. *)
+            | [< 'Token.Kwd '(';
+                 args=parse_args [];
+                 'Token.Kwd ')' ?? "expected ')'">] ->
+                Ast.Call (id, Array.of_list (List.rev args))
+
+            (* Simple variable ref. *)
+            | [< >] -> Ast.Variable id
+          in
+          parse_ident id stream
+
+This routine follows the same style as the other routines. (It expects
+to be called if the current token is a ``Token.Ident`` token). It also
+has recursion and error handling. One interesting aspect of this is that
+it uses *look-ahead* to determine if the current identifier is a stand
+alone variable reference or if it is a function call expression. It
+handles this by checking to see if the token after the identifier is a
+'(' token, constructing either a ``Ast.Variable`` or ``Ast.Call`` node
+as appropriate.
+
+We finish up by raising an exception if we received a token we didn't
+expect:
+
+.. code-block:: ocaml
+
+      | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+Now that basic expressions are handled, we need to handle binary
+expressions. They are a bit more complex.
+
+Binary Expression Parsing
+=========================
+
+Binary expressions are significantly harder to parse because they are
+often ambiguous. For example, when given the string "x+y\*z", the parser
+can choose to parse it as either "(x+y)\*z" or "x+(y\*z)". With common
+definitions from mathematics, we expect the later parse, because "\*"
+(multiplication) has higher *precedence* than "+" (addition).
+
+There are many ways to handle this, but an elegant and efficient way is
+to use `Operator-Precedence
+Parsing <http://en.wikipedia.org/wiki/Operator-precedence_parser>`_.
+This parsing technique uses the precedence of binary operators to guide
+recursion. To start with, we need a table of precedences:
+
+.. code-block:: ocaml
+
+    (* binop_precedence - This holds the precedence for each binary operator that is
+     * defined *)
+    let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+    (* precedence - Get the precedence of the pending binary operator token. *)
+    let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+    ...
+
+    let main () =
+      (* Install standard binary operators.
+       * 1 is the lowest precedence. *)
+      Hashtbl.add Parser.binop_precedence '<' 10;
+      Hashtbl.add Parser.binop_precedence '+' 20;
+      Hashtbl.add Parser.binop_precedence '-' 20;
+      Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+      ...
+
+For the basic form of Kaleidoscope, we will only support 4 binary
+operators (this can obviously be extended by you, our brave and intrepid
+reader). The ``Parser.precedence`` function returns the precedence for
+the current token, or -1 if the token is not a binary operator. Having a
+``Hashtbl.t`` makes it easy to add new operators and makes it clear that
+the algorithm doesn't depend on the specific operators involved, but it
+would be easy enough to eliminate the ``Hashtbl.t`` and do the
+comparisons in the ``Parser.precedence`` function. (Or just use a
+fixed-size array).
+
+With the helper above defined, we can now start parsing binary
+expressions. The basic idea of operator precedence parsing is to break
+down an expression with potentially ambiguous binary operators into
+pieces. Consider ,for example, the expression "a+b+(c+d)\*e\*f+g".
+Operator precedence parsing considers this as a stream of primary
+expressions separated by binary operators. As such, it will first parse
+the leading primary expression "a", then it will see the pairs [+, b]
+[+, (c+d)] [\*, e] [\*, f] and [+, g]. Note that because parentheses are
+primary expressions, the binary expression parser doesn't need to worry
+about nested subexpressions like (c+d) at all.
+
+To start, an expression is a primary expression potentially followed by
+a sequence of [binop,primaryexpr] pairs:
+
+.. code-block:: ocaml
+
+    (* expression
+     *   ::= primary binoprhs *)
+    and parse_expr = parser
+      | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
+
+``Parser.parse_bin_rhs`` is the function that parses the sequence of
+pairs for us. It takes a precedence and a pointer to an expression for
+the part that has been parsed so far. Note that "x" is a perfectly valid
+expression: As such, "binoprhs" is allowed to be empty, in which case it
+returns the expression that is passed into it. In our example above, the
+code passes the expression for "a" into ``Parser.parse_bin_rhs`` and the
+current token is "+".
+
+The precedence value passed into ``Parser.parse_bin_rhs`` indicates the
+*minimal operator precedence* that the function is allowed to eat. For
+example, if the current pair stream is [+, x] and
+``Parser.parse_bin_rhs`` is passed in a precedence of 40, it will not
+consume any tokens (because the precedence of '+' is only 20). With this
+in mind, ``Parser.parse_bin_rhs`` starts with:
+
+.. code-block:: ocaml
+
+    (* binoprhs
+     *   ::= ('+' primary)* *)
+    and parse_bin_rhs expr_prec lhs stream =
+      match Stream.peek stream with
+      (* If this is a binop, find its precedence. *)
+      | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+          let token_prec = precedence c in
+
+          (* If this is a binop that binds at least as tightly as the current binop,
+           * consume it, otherwise we are done. *)
+          if token_prec < expr_prec then lhs else begin
+
+This code gets the precedence of the current token and checks to see if
+if is too low. Because we defined invalid tokens to have a precedence of
+-1, this check implicitly knows that the pair-stream ends when the token
+stream runs out of binary operators. If this check succeeds, we know
+that the token is a binary operator and that it will be included in this
+expression:
+
+.. code-block:: ocaml
+
+            (* Eat the binop. *)
+            Stream.junk stream;
+
+            (* Okay, we know this is a binop. *)
+            let rhs =
+              match Stream.peek stream with
+              | Some (Token.Kwd c2) ->
+
+As such, this code eats (and remembers) the binary operator and then
+parses the primary expression that follows. This builds up the whole
+pair, the first of which is [+, b] for the running example.
+
+Now that we parsed the left-hand side of an expression and one pair of
+the RHS sequence, we have to decide which way the expression associates.
+In particular, we could have "(a+b) binop unparsed" or "a + (b binop
+unparsed)". To determine this, we look ahead at "binop" to determine its
+precedence and compare it to BinOp's precedence (which is '+' in this
+case):
+
+.. code-block:: ocaml
+
+                  (* If BinOp binds less tightly with rhs than the operator after
+                   * rhs, let the pending operator take rhs as its lhs. *)
+                  let next_prec = precedence c2 in
+                  if token_prec < next_prec
+
+If the precedence of the binop to the right of "RHS" is lower or equal
+to the precedence of our current operator, then we know that the
+parentheses associate as "(a+b) binop ...". In our example, the current
+operator is "+" and the next operator is "+", we know that they have the
+same precedence. In this case we'll create the AST node for "a+b", and
+then continue parsing:
+
+.. code-block:: ocaml
+
+              ... if body omitted ...
+            in
+
+            (* Merge lhs/rhs. *)
+            let lhs = Ast.Binary (c, lhs, rhs) in
+            parse_bin_rhs expr_prec lhs stream
+          end
+
+In our example above, this will turn "a+b+" into "(a+b)" and execute the
+next iteration of the loop, with "+" as the current token. The code
+above will eat, remember, and parse "(c+d)" as the primary expression,
+which makes the current pair equal to [+, (c+d)]. It will then evaluate
+the 'if' conditional above with "\*" as the binop to the right of the
+primary. In this case, the precedence of "\*" is higher than the
+precedence of "+" so the if condition will be entered.
+
+The critical question left here is "how can the if condition parse the
+right hand side in full"? In particular, to build the AST correctly for
+our example, it needs to get all of "(c+d)\*e\*f" as the RHS expression
+variable. The code to do this is surprisingly simple (code from the
+above two blocks duplicated for context):
+
+.. code-block:: ocaml
+
+              match Stream.peek stream with
+              | Some (Token.Kwd c2) ->
+                  (* If BinOp binds less tightly with rhs than the operator after
+                   * rhs, let the pending operator take rhs as its lhs. *)
+                  if token_prec < precedence c2
+                  then parse_bin_rhs (token_prec + 1) rhs stream
+                  else rhs
+              | _ -> rhs
+            in
+
+            (* Merge lhs/rhs. *)
+            let lhs = Ast.Binary (c, lhs, rhs) in
+            parse_bin_rhs expr_prec lhs stream
+          end
+
+At this point, we know that the binary operator to the RHS of our
+primary has higher precedence than the binop we are currently parsing.
+As such, we know that any sequence of pairs whose operators are all
+higher precedence than "+" should be parsed together and returned as
+"RHS". To do this, we recursively invoke the ``Parser.parse_bin_rhs``
+function specifying "token\_prec+1" as the minimum precedence required
+for it to continue. In our example above, this will cause it to return
+the AST node for "(c+d)\*e\*f" as RHS, which is then set as the RHS of
+the '+' expression.
+
+Finally, on the next iteration of the while loop, the "+g" piece is
+parsed and added to the AST. With this little bit of code (14
+non-trivial lines), we correctly handle fully general binary expression
+parsing in a very elegant way. This was a whirlwind tour of this code,
+and it is somewhat subtle. I recommend running through it with a few
+tough examples to see how it works.
+
+This wraps up handling of expressions. At this point, we can point the
+parser at an arbitrary token stream and build an expression from it,
+stopping at the first token that is not part of the expression. Next up
+we need to handle function definitions, etc.
+
+Parsing the Rest
+================
+
+The next thing missing is handling of function prototypes. In
+Kaleidoscope, these are used both for 'extern' function declarations as
+well as function body definitions. The code to do this is
+straight-forward and not very interesting (once you've survived
+expressions):
+
+.. code-block:: ocaml
+
+    (* prototype
+     *   ::= id '(' id* ')' *)
+    let parse_prototype =
+      let rec parse_args accumulator = parser
+        | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+        | [< >] -> accumulator
+      in
+
+      parser
+      | [< 'Token.Ident id;
+           'Token.Kwd '(' ?? "expected '(' in prototype";
+           args=parse_args [];
+           'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+          (* success. *)
+          Ast.Prototype (id, Array.of_list (List.rev args))
+
+      | [< >] ->
+          raise (Stream.Error "expected function name in prototype")
+
+Given this, a function definition is very simple, just a prototype plus
+an expression to implement the body:
+
+.. code-block:: ocaml
+
+    (* definition ::= 'def' prototype expression *)
+    let parse_definition = parser
+      | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+          Ast.Function (p, e)
+
+In addition, we support 'extern' to declare functions like 'sin' and
+'cos' as well as to support forward declaration of user functions. These
+'extern's are just prototypes with no body:
+
+.. code-block:: ocaml
+
+    (*  external ::= 'extern' prototype *)
+    let parse_extern = parser
+      | [< 'Token.Extern; e=parse_prototype >] -> e
+
+Finally, we'll also let the user type in arbitrary top-level expressions
+and evaluate them on the fly. We will handle this by defining anonymous
+nullary (zero argument) functions for them:
+
+.. code-block:: ocaml
+
+    (* toplevelexpr ::= expression *)
+    let parse_toplevel = parser
+      | [< e=parse_expr >] ->
+          (* Make an anonymous proto. *)
+          Ast.Function (Ast.Prototype ("", [||]), e)
+
+Now that we have all the pieces, let's build a little driver that will
+let us actually *execute* this code we've built!
+
+The Driver
+==========
+
+The driver for this simply invokes all of the parsing pieces with a
+top-level dispatch loop. There isn't much interesting here, so I'll just
+include the top-level loop. See `below <#code>`_ for full code in the
+"Top-Level Parsing" section.
+
+.. code-block:: ocaml
+
+    (* top ::= definition | external | expression | ';' *)
+    let rec main_loop stream =
+      match Stream.peek stream with
+      | None -> ()
+
+      (* ignore top-level semicolons. *)
+      | Some (Token.Kwd ';') ->
+          Stream.junk stream;
+          main_loop stream
+
+      | Some token ->
+          begin
+            try match token with
+            | Token.Def ->
+                ignore(Parser.parse_definition stream);
+                print_endline "parsed a function definition.";
+            | Token.Extern ->
+                ignore(Parser.parse_extern stream);
+                print_endline "parsed an extern.";
+            | _ ->
+                (* Evaluate a top-level expression into an anonymous function. *)
+                ignore(Parser.parse_toplevel stream);
+                print_endline "parsed a top-level expr";
+            with Stream.Error s ->
+              (* Skip token for error recovery. *)
+              Stream.junk stream;
+              print_endline s;
+          end;
+          print_string "ready> "; flush stdout;
+          main_loop stream
+
+The most interesting part of this is that we ignore top-level
+semicolons. Why is this, you ask? The basic reason is that if you type
+"4 + 5" at the command line, the parser doesn't know whether that is the
+end of what you will type or not. For example, on the next line you
+could type "def foo..." in which case 4+5 is the end of a top-level
+expression. Alternatively you could type "\* 6", which would continue
+the expression. Having top-level semicolons allows you to type "4+5;",
+and the parser will know you are done.
+
+Conclusions
+===========
+
+With just under 300 lines of commented code (240 lines of non-comment,
+non-blank code), we fully defined our minimal language, including a
+lexer, parser, and AST builder. With this done, the executable will
+validate Kaleidoscope code and tell us if it is grammatically invalid.
+For example, here is a sample interaction:
+
+.. code-block:: bash
+
+    $ ./toy.byte
+    ready> def foo(x y) x+foo(y, 4.0);
+    Parsed a function definition.
+    ready> def foo(x y) x+y y;
+    Parsed a function definition.
+    Parsed a top-level expr
+    ready> def foo(x y) x+y );
+    Parsed a function definition.
+    Error: unknown token when expecting an expression
+    ready> extern sin(a);
+    ready> Parsed an extern
+    ready> ^D
+    $
+
+There is a lot of room for extension here. You can define new AST nodes,
+extend the language in many ways, etc. In the `next
+installment <OCamlLangImpl3.html>`_, we will describe how to generate
+LLVM Intermediate Representation (IR) from the AST.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for this and the previous chapter.
+Note that it is fully self-contained: you don't need LLVM or any
+external libraries at all for this. (Besides the ocaml standard
+libraries, of course.) To build this, just compile with:
+
+.. code-block:: bash
+
+    # Compile
+    ocamlbuild toy.byte
+    # Run
+    ./toy.byte
+
+Here is the code:
+
+\_tags:
+    ::
+
+        <{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
+
+token.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer Tokens
+         *===----------------------------------------------------------------------===*)
+
+        (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+         * these others for known things. *)
+        type token =
+          (* commands *)
+          | Def | Extern
+
+          (* primary *)
+          | Ident of string | Number of float
+
+          (* unknown *)
+          | Kwd of char
+
+lexer.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer
+         *===----------------------------------------------------------------------===*)
+
+        let rec lex = parser
+          (* Skip any whitespace. *)
+          | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+          (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+
+          (* number: [0-9.]+ *)
+          | [< ' ('0' .. '9' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+
+          (* Comment until end of line. *)
+          | [< ' ('#'); stream >] ->
+              lex_comment stream
+
+          (* Otherwise, just return the character as its ascii value. *)
+          | [< 'c; stream >] ->
+              [< 'Token.Kwd c; lex stream >]
+
+          (* end of stream. *)
+          | [< >] -> [< >]
+
+        and lex_number buffer = parser
+          | [< ' ('0' .. '9' | '.' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+          | [< stream=lex >] ->
+              [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+        and lex_ident buffer = parser
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+          | [< stream=lex >] ->
+              match Buffer.contents buffer with
+              | "def" -> [< 'Token.Def; stream >]
+              | "extern" -> [< 'Token.Extern; stream >]
+              | id -> [< 'Token.Ident id; stream >]
+
+        and lex_comment = parser
+          | [< ' ('\n'); stream=lex >] -> stream
+          | [< 'c; e=lex_comment >] -> e
+          | [< >] -> [< >]
+
+ast.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Abstract Syntax Tree (aka Parse Tree)
+         *===----------------------------------------------------------------------===*)
+
+        (* expr - Base type for all expression nodes. *)
+        type expr =
+          (* variant for numeric literals like "1.0". *)
+          | Number of float
+
+          (* variant for referencing a variable, like "a". *)
+          | Variable of string
+
+          (* variant for a binary operator. *)
+          | Binary of char * expr * expr
+
+          (* variant for function calls. *)
+          | Call of string * expr array
+
+        (* proto - This type represents the "prototype" for a function, which captures
+         * its name, and its argument names (thus implicitly the number of arguments the
+         * function takes). *)
+        type proto = Prototype of string * string array
+
+        (* func - This type represents a function definition itself. *)
+        type func = Function of proto * expr
+
+parser.ml:
+    .. code-block:: ocaml
+
+        (*===---------------------------------------------------------------------===
+         * Parser
+         *===---------------------------------------------------------------------===*)
+
+        (* binop_precedence - This holds the precedence for each binary operator that is
+         * defined *)
+        let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+        (* precedence - Get the precedence of the pending binary operator token. *)
+        let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+        (* primary
+         *   ::= identifier
+         *   ::= numberexpr
+         *   ::= parenexpr *)
+        let rec parse_primary = parser
+          (* numberexpr ::= number *)
+          | [< 'Token.Number n >] -> Ast.Number n
+
+          (* parenexpr ::= '(' expression ')' *)
+          | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+          (* identifierexpr
+           *   ::= identifier
+           *   ::= identifier '(' argumentexpr ')' *)
+          | [< 'Token.Ident id; stream >] ->
+              let rec parse_args accumulator = parser
+                | [< e=parse_expr; stream >] ->
+                    begin parser
+                      | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+                      | [< >] -> e :: accumulator
+                    end stream
+                | [< >] -> accumulator
+              in
+              let rec parse_ident id = parser
+                (* Call. *)
+                | [< 'Token.Kwd '(';
+                     args=parse_args [];
+                     'Token.Kwd ')' ?? "expected ')'">] ->
+                    Ast.Call (id, Array.of_list (List.rev args))
+
+                (* Simple variable ref. *)
+                | [< >] -> Ast.Variable id
+              in
+              parse_ident id stream
+
+          | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+        (* binoprhs
+         *   ::= ('+' primary)* *)
+        and parse_bin_rhs expr_prec lhs stream =
+          match Stream.peek stream with
+          (* If this is a binop, find its precedence. *)
+          | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+              let token_prec = precedence c in
+
+              (* If this is a binop that binds at least as tightly as the current binop,
+               * consume it, otherwise we are done. *)
+              if token_prec < expr_prec then lhs else begin
+                (* Eat the binop. *)
+                Stream.junk stream;
+
+                (* Parse the primary expression after the binary operator. *)
+                let rhs = parse_primary stream in
+
+                (* Okay, we know this is a binop. *)
+                let rhs =
+                  match Stream.peek stream with
+                  | Some (Token.Kwd c2) ->
+                      (* If BinOp binds less tightly with rhs than the operator after
+                       * rhs, let the pending operator take rhs as its lhs. *)
+                      let next_prec = precedence c2 in
+                      if token_prec < next_prec
+                      then parse_bin_rhs (token_prec + 1) rhs stream
+                      else rhs
+                  | _ -> rhs
+                in
+
+                (* Merge lhs/rhs. *)
+                let lhs = Ast.Binary (c, lhs, rhs) in
+                parse_bin_rhs expr_prec lhs stream
+              end
+          | _ -> lhs
+
+        (* expression
+         *   ::= primary binoprhs *)
+        and parse_expr = parser
+          | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
+
+        (* prototype
+         *   ::= id '(' id* ')' *)
+        let parse_prototype =
+          let rec parse_args accumulator = parser
+            | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+            | [< >] -> accumulator
+          in
+
+          parser
+          | [< 'Token.Ident id;
+               'Token.Kwd '(' ?? "expected '(' in prototype";
+               args=parse_args [];
+               'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+              (* success. *)
+              Ast.Prototype (id, Array.of_list (List.rev args))
+
+          | [< >] ->
+              raise (Stream.Error "expected function name in prototype")
+
+        (* definition ::= 'def' prototype expression *)
+        let parse_definition = parser
+          | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+              Ast.Function (p, e)
+
+        (* toplevelexpr ::= expression *)
+        let parse_toplevel = parser
+          | [< e=parse_expr >] ->
+              (* Make an anonymous proto. *)
+              Ast.Function (Ast.Prototype ("", [||]), e)
+
+        (*  external ::= 'extern' prototype *)
+        let parse_extern = parser
+          | [< 'Token.Extern; e=parse_prototype >] -> e
+
+toplevel.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Top-Level parsing and JIT Driver
+         *===----------------------------------------------------------------------===*)
+
+        (* top ::= definition | external | expression | ';' *)
+        let rec main_loop stream =
+          match Stream.peek stream with
+          | None -> ()
+
+          (* ignore top-level semicolons. *)
+          | Some (Token.Kwd ';') ->
+              Stream.junk stream;
+              main_loop stream
+
+          | Some token ->
+              begin
+                try match token with
+                | Token.Def ->
+                    ignore(Parser.parse_definition stream);
+                    print_endline "parsed a function definition.";
+                | Token.Extern ->
+                    ignore(Parser.parse_extern stream);
+                    print_endline "parsed an extern.";
+                | _ ->
+                    (* Evaluate a top-level expression into an anonymous function. *)
+                    ignore(Parser.parse_toplevel stream);
+                    print_endline "parsed a top-level expr";
+                with Stream.Error s ->
+                  (* Skip token for error recovery. *)
+                  Stream.junk stream;
+                  print_endline s;
+              end;
+              print_string "ready> "; flush stdout;
+              main_loop stream
+
+toy.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Main driver code.
+         *===----------------------------------------------------------------------===*)
+
+        let main () =
+          (* Install standard binary operators.
+           * 1 is the lowest precedence. *)
+          Hashtbl.add Parser.binop_precedence '<' 10;
+          Hashtbl.add Parser.binop_precedence '+' 20;
+          Hashtbl.add Parser.binop_precedence '-' 20;
+          Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+          (* Prime the first token. *)
+          print_string "ready> "; flush stdout;
+          let stream = Lexer.lex (Stream.of_channel stdin) in
+
+          (* Run the main "interpreter loop" now. *)
+          Toplevel.main_loop stream;
+        ;;
+
+        main ()
+
+`Next: Implementing Code Generation to LLVM IR <OCamlLangImpl3.html>`_
+
diff --git a/docs/tutorial/OCamlLangImpl3.html b/docs/tutorial/OCamlLangImpl3.html
deleted file mode 100644
index a49a0b5d9c..0000000000
--- a/docs/tutorial/OCamlLangImpl3.html
+++ /dev/null
@@ -1,1093 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Implementing code generation to LLVM IR</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <meta name="author" content="Erick Tryzelaar">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Code generation to LLVM IR</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 3
-  <ol>
-    <li><a href="#intro">Chapter 3 Introduction</a></li>
-    <li><a href="#basics">Code Generation Setup</a></li>
-    <li><a href="#exprs">Expression Code Generation</a></li>
-    <li><a href="#funcs">Function Code Generation</a></li>
-    <li><a href="#driver">Driver Changes and Closing Thoughts</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="OCamlLangImpl4.html">Chapter 4</a>: Adding JIT and Optimizer
-Support</li>
-</ul>
-
-<div class="doc_author">
-	<p>
-		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
-	</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 3 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 3 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  This chapter shows you how to transform the <a
-href="OCamlLangImpl2.html">Abstract Syntax Tree</a>, built in Chapter 2, into
-LLVM IR.  This will teach you a little bit about how LLVM does things, as well
-as demonstrate how easy it is to use.  It's much more work to build a lexer and
-parser than it is to generate LLVM IR code. :)
-</p>
-
-<p><b>Please note</b>: the code in this chapter and later require LLVM 2.3 or
-LLVM SVN to work.  LLVM 2.2 and before will not work with it.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="basics">Code Generation Setup</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-In order to generate LLVM IR, we want some simple setup to get started.  First
-we define virtual code generation (codegen) methods in each AST class:</p>
-
-<div class="doc_code">
-<pre>
-let rec codegen_expr = function
-  | Ast.Number n -&gt; ...
-  | Ast.Variable name -&gt; ...
-</pre>
-</div>
-
-<p>The <tt>Codegen.codegen_expr</tt> function says to emit IR for that AST node
-along with all the things it depends on, and they all return an LLVM Value
-object.  "Value" is the class used to represent a "<a
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Static Single
-Assignment (SSA)</a> register" or "SSA value" in LLVM.  The most distinct aspect
-of SSA values is that their value is computed as the related instruction
-executes, and it does not get a new value until (and if) the instruction
-re-executes.  In other words, there is no way to "change" an SSA value.  For
-more information, please read up on <a
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Static Single
-Assignment</a> - the concepts are really quite natural once you grok them.</p>
-
-<p>The
-second thing we want is an "Error" exception like we used for the parser, which
-will be used to report errors found during code generation (for example, use of
-an undeclared parameter):</p>
-
-<div class="doc_code">
-<pre>
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-</pre>
-</div>
-
-<p>The static variables will be used during code generation.
-<tt>Codgen.the_module</tt> is the LLVM construct that contains all of the
-functions and global variables in a chunk of code.  In many ways, it is the
-top-level structure that the LLVM IR uses to contain code.</p>
-
-<p>The <tt>Codegen.builder</tt> object is a helper object that makes it easy to
-generate LLVM instructions.  Instances of the <a
-href="http://llvm.org/doxygen/IRBuilder_8h-source.html"><tt>IRBuilder</tt></a>
-class keep track of the current place to insert instructions and has methods to
-create new instructions.</p>
-
-<p>The <tt>Codegen.named_values</tt> map keeps track of which values are defined
-in the current scope and what their LLVM representation is.  (In other words, it
-is a symbol table for the code).  In this form of Kaleidoscope, the only things
-that can be referenced are function parameters.  As such, function parameters
-will be in this map when generating code for their function body.</p>
-
-<p>
-With these basics in place, we can start talking about how to generate code for
-each expression.  Note that this assumes that the <tt>Codgen.builder</tt> has
-been set up to generate code <em>into</em> something.  For now, we'll assume
-that this has already been done, and we'll just use it to emit code.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="exprs">Expression Code Generation</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Generating LLVM code for expression nodes is very straightforward: less
-than 30 lines of commented code for all four of our expression nodes.  First
-we'll do numeric literals:</p>
-
-<div class="doc_code">
-<pre>
-  | Ast.Number n -&gt; const_float double_type n
-</pre>
-</div>
-
-<p>In the LLVM IR, numeric constants are represented with the
-<tt>ConstantFP</tt> class, which holds the numeric value in an <tt>APFloat</tt>
-internally (<tt>APFloat</tt> has the capability of holding floating point
-constants of <em>A</em>rbitrary <em>P</em>recision).  This code basically just
-creates and returns a <tt>ConstantFP</tt>.  Note that in the LLVM IR
-that constants are all uniqued together and shared.  For this reason, the API
-uses "the foo::get(..)" idiom instead of "new foo(..)" or "foo::Create(..)".</p>
-
-<div class="doc_code">
-<pre>
-  | Ast.Variable name -&gt;
-      (try Hashtbl.find named_values name with
-        | Not_found -&gt; raise (Error "unknown variable name"))
-</pre>
-</div>
-
-<p>References to variables are also quite simple using LLVM.  In the simple
-version of Kaleidoscope, we assume that the variable has already been emitted
-somewhere and its value is available.  In practice, the only values that can be
-in the <tt>Codegen.named_values</tt> map are function arguments.  This code
-simply checks to see that the specified name is in the map (if not, an unknown
-variable is being referenced) and returns the value for it.  In future chapters,
-we'll add support for <a href="LangImpl5.html#for">loop induction variables</a>
-in the symbol table, and for <a href="LangImpl7.html#localvars">local
-variables</a>.</p>
-
-<div class="doc_code">
-<pre>
-  | Ast.Binary (op, lhs, rhs) -&gt;
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -&gt; build_fadd lhs_val rhs_val "addtmp" builder
-        | '-' -&gt; build_fsub lhs_val rhs_val "subtmp" builder
-        | '*' -&gt; build_fmul lhs_val rhs_val "multmp" builder
-        | '&lt;' -&gt;
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        | _ -&gt; raise (Error "invalid binary operator")
-      end
-</pre>
-</div>
-
-<p>Binary operators start to get more interesting.  The basic idea here is that
-we recursively emit code for the left-hand side of the expression, then the
-right-hand side, then we compute the result of the binary expression.  In this
-code, we do a simple switch on the opcode to create the right LLVM instruction.
-</p>
-
-<p>In the example above, the LLVM builder class is starting to show its value.
-IRBuilder knows where to insert the newly created instruction, all you have to
-do is specify what instruction to create (e.g. with <tt>Llvm.create_add</tt>),
-which operands to use (<tt>lhs</tt> and <tt>rhs</tt> here) and optionally
-provide a name for the generated instruction.</p>
-
-<p>One nice thing about LLVM is that the name is just a hint.  For instance, if
-the code above emits multiple "addtmp" variables, LLVM will automatically
-provide each one with an increasing, unique numeric suffix.  Local value names
-for instructions are purely optional, but it makes it much easier to read the
-IR dumps.</p>
-
-<p><a href="../LangRef.html#instref">LLVM instructions</a> are constrained by
-strict rules: for example, the Left and Right operators of
-an <a href="../LangRef.html#i_add">add instruction</a> must have the same
-type, and the result type of the add must match the operand types.  Because
-all values in Kaleidoscope are doubles, this makes for very simple code for add,
-sub and mul.</p>
-
-<p>On the other hand, LLVM specifies that the <a
-href="../LangRef.html#i_fcmp">fcmp instruction</a> always returns an 'i1' value
-(a one bit integer).  The problem with this is that Kaleidoscope wants the value to be a 0.0 or 1.0 value.  In order to get these semantics, we combine the fcmp instruction with
-a <a href="../LangRef.html#i_uitofp">uitofp instruction</a>.  This instruction
-converts its input integer into a floating point value by treating the input
-as an unsigned value.  In contrast, if we used the <a
-href="../LangRef.html#i_sitofp">sitofp instruction</a>, the Kaleidoscope '&lt;'
-operator would return 0.0 and -1.0, depending on the input value.</p>
-
-<div class="doc_code">
-<pre>
-  | Ast.Call (callee, args) -&gt;
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-</pre>
-</div>
-
-<p>Code generation for function calls is quite straightforward with LLVM.  The
-code above initially does a function name lookup in the LLVM Module's symbol
-table.  Recall that the LLVM Module is the container that holds all of the
-functions we are JIT'ing.  By giving each function the same name as what the
-user specifies, we can use the LLVM symbol table to resolve function names for
-us.</p>
-
-<p>Once we have the function to call, we recursively codegen each argument that
-is to be passed in, and create an LLVM <a href="../LangRef.html#i_call">call
-instruction</a>.  Note that LLVM uses the native C calling conventions by
-default, allowing these calls to also call into standard library functions like
-"sin" and "cos", with no additional effort.</p>
-
-<p>This wraps up our handling of the four basic expressions that we have so far
-in Kaleidoscope.  Feel free to go in and add some more.  For example, by
-browsing the <a href="../LangRef.html">LLVM language reference</a> you'll find
-several other interesting instructions that are really easy to plug into our
-basic framework.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="funcs">Function Code Generation</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Code generation for prototypes and functions must handle a number of
-details, which make their code less beautiful than expression code
-generation, but allows us to illustrate some important points.  First, lets
-talk about code generation for prototypes: they are used both for function
-bodies and external function declarations.  The code starts with:</p>
-
-<div class="doc_code">
-<pre>
-let codegen_proto = function
-  | Ast.Prototype (name, args) -&gt;
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-</pre>
-</div>
-
-<p>This code packs a lot of power into a few lines.  Note first that this
-function returns a "Function*" instead of a "Value*" (although at the moment
-they both are modeled by <tt>llvalue</tt> in ocaml).  Because a "prototype"
-really talks about the external interface for a function (not the value computed
-by an expression), it makes sense for it to return the LLVM Function it
-corresponds to when codegen'd.</p>
-
-<p>The call to <tt>Llvm.function_type</tt> creates the <tt>Llvm.llvalue</tt>
-that should be used for a given Prototype.  Since all function arguments in
-Kaleidoscope are of type double, the first line creates a vector of "N" LLVM
-double types.  It then uses the <tt>Llvm.function_type</tt> method to create a
-function type that takes "N" doubles as arguments, returns one double as a
-result, and that is not vararg (that uses the function
-<tt>Llvm.var_arg_function_type</tt>).  Note that Types in LLVM are uniqued just
-like <tt>Constant</tt>s are, so you don't "new" a type, you "get" it.</p>
-
-<p>The final line above checks if the function has already been defined in
-<tt>Codegen.the_module</tt>. If not, we will create it.</p>
-
-<div class="doc_code">
-<pre>
-        | None -&gt; declare_function name ft the_module
-</pre>
-</div>
-
-<p>This indicates the type and name to use, as well as which module to insert
-into.  By default we assume a function has
-<tt>Llvm.Linkage.ExternalLinkage</tt>.  "<a href="LangRef.html#linkage">external
-linkage</a>" means that the function may be defined outside the current module
-and/or that it is callable by functions outside the module.  The "<tt>name</tt>"
-passed in is the name the user specified: this name is registered in
-"<tt>Codegen.the_module</tt>"s symbol table, which is used by the function call
-code above.</p>
-
-<p>In Kaleidoscope, I choose to allow redefinitions of functions in two cases:
-first, we want to allow 'extern'ing a function more than once, as long as the
-prototypes for the externs match (since all arguments have the same type, we
-just have to check that the number of arguments match).  Second, we want to
-allow 'extern'ing a function and then defining a body for it.  This is useful
-when defining mutually recursive functions.</p>
-
-<div class="doc_code">
-<pre>
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f -&gt;
-            (* If 'f' already has a body, reject this. *)
-            if Array.length (basic_blocks f) == 0 then () else
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if Array.length (params f) == Array.length args then () else
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-</pre>
-</div>
-
-<p>In order to verify the logic above, we first check to see if the pre-existing
-function is "empty".  In this case, empty means that it has no basic blocks in
-it, which means it has no body.  If it has no body, it is a forward
-declaration.  Since we don't allow anything after a full definition of the
-function, the code rejects this case.  If the previous reference to a function
-was an 'extern', we simply verify that the number of arguments for that
-definition and this one match up.  If not, we emit an error.</p>
-
-<div class="doc_code">
-<pre>
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a -&gt;
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-</pre>
-</div>
-
-<p>The last bit of code for prototypes loops over all of the arguments in the
-function, setting the name of the LLVM Argument objects to match, and registering
-the arguments in the <tt>Codegen.named_values</tt> map for future use by the
-<tt>Ast.Variable</tt> variant.  Once this is set up, it returns the Function
-object to the caller.  Note that we don't check for conflicting
-argument names here (e.g. "extern foo(a b a)").  Doing so would be very
-straight-forward with the mechanics we have already used above.</p>
-
-<div class="doc_code">
-<pre>
-let codegen_func = function
-  | Ast.Function (proto, body) -&gt;
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-</pre>
-</div>
-
-<p>Code generation for function definitions starts out simply enough: we just
-codegen the prototype (Proto) and verify that it is ok.  We then clear out the
-<tt>Codegen.named_values</tt> map to make sure that there isn't anything in it
-from the last function we compiled.  Code generation of the prototype ensures
-that there is an LLVM Function object that is ready to go for us.</p>
-
-<div class="doc_code">
-<pre>
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        let ret_val = codegen_expr body in
-</pre>
-</div>
-
-<p>Now we get to the point where the <tt>Codegen.builder</tt> is set up.  The
-first line creates a new
-<a href="http://en.wikipedia.org/wiki/Basic_block">basic block</a> (named
-"entry"), which is inserted into <tt>the_function</tt>.  The second line then
-tells the builder that new instructions should be inserted into the end of the
-new basic block.  Basic blocks in LLVM are an important part of functions that
-define the <a
-href="http://en.wikipedia.org/wiki/Control_flow_graph">Control Flow Graph</a>.
-Since we don't have any control flow, our functions will only contain one
-block at this point.  We'll fix this in <a href="OCamlLangImpl5.html">Chapter
-5</a> :).</p>
-
-<div class="doc_code">
-<pre>
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        the_function
-</pre>
-</div>
-
-<p>Once the insertion point is set up, we call the <tt>Codegen.codegen_func</tt>
-method for the root expression of the function.  If no error happens, this emits
-code to compute the expression into the entry block and returns the value that
-was computed.  Assuming no error, we then create an LLVM <a
-href="../LangRef.html#i_ret">ret instruction</a>, which completes the function.
-Once the function is built, we call
-<tt>Llvm_analysis.assert_valid_function</tt>, which is provided by LLVM.  This
-function does a variety of consistency checks on the generated code, to
-determine if our compiler is doing everything right.  Using this is important:
-it can catch a lot of bugs.  Once the function is finished and validated, we
-return it.</p>
-
-<div class="doc_code">
-<pre>
-      with e -&gt;
-        delete_function the_function;
-        raise e
-</pre>
-</div>
-
-<p>The only piece left here is handling of the error case.  For simplicity, we
-handle this by merely deleting the function we produced with the
-<tt>Llvm.delete_function</tt> method.  This allows the user to redefine a
-function that they incorrectly typed in before: if we didn't delete it, it
-would live in the symbol table, with a body, preventing future redefinition.</p>
-
-<p>This code does have a bug, though.  Since the <tt>Codegen.codegen_proto</tt>
-can return a previously defined forward declaration, our code can actually delete
-a forward declaration.  There are a number of ways to fix this bug, see what you
-can come up with!  Here is a testcase:</p>
-
-<div class="doc_code">
-<pre>
-extern foo(a b);     # ok, defines foo.
-def foo(a b) c;      # error, 'c' is invalid.
-def bar() foo(1, 2); # error, unknown function "foo"
-</pre>
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="driver">Driver Changes and Closing Thoughts</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-For now, code generation to LLVM doesn't really get us much, except that we can
-look at the pretty IR calls.  The sample code inserts calls to Codegen into the
-"<tt>Toplevel.main_loop</tt>", and then dumps out the LLVM IR.  This gives a
-nice way to look at the LLVM IR for simple functions.  For example:
-</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>4+5</b>;
-Read top-level expression:
-define double @""() {
-entry:
-        %addtmp = fadd double 4.000000e+00, 5.000000e+00
-        ret double %addtmp
-}
-</pre>
-</div>
-
-<p>Note how the parser turns the top-level expression into anonymous functions
-for us.  This will be handy when we add <a href="OCamlLangImpl4.html#jit">JIT
-support</a> in the next chapter.  Also note that the code is very literally
-transcribed, no optimizations are being performed.  We will
-<a href="OCamlLangImpl4.html#trivialconstfold">add optimizations</a> explicitly
-in the next chapter.</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def foo(a b) a*a + 2*a*b + b*b;</b>
-Read function definition:
-define double @foo(double %a, double %b) {
-entry:
-        %multmp = fmul double %a, %a
-        %multmp1 = fmul double 2.000000e+00, %a
-        %multmp2 = fmul double %multmp1, %b
-        %addtmp = fadd double %multmp, %multmp2
-        %multmp3 = fmul double %b, %b
-        %addtmp4 = fadd double %addtmp, %multmp3
-        ret double %addtmp4
-}
-</pre>
-</div>
-
-<p>This shows some simple arithmetic. Notice the striking similarity to the
-LLVM builder calls that we use to create the instructions.</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def bar(a) foo(a, 4.0) + bar(31337);</b>
-Read function definition:
-define double @bar(double %a) {
-entry:
-        %calltmp = call double @foo(double %a, double 4.000000e+00)
-        %calltmp1 = call double @bar(double 3.133700e+04)
-        %addtmp = fadd double %calltmp, %calltmp1
-        ret double %addtmp
-}
-</pre>
-</div>
-
-<p>This shows some function calls.  Note that this function will take a long
-time to execute if you call it.  In the future we'll add conditional control
-flow to actually make recursion useful :).</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>extern cos(x);</b>
-Read extern:
-declare double @cos(double)
-
-ready&gt; <b>cos(1.234);</b>
-Read top-level expression:
-define double @""() {
-entry:
-        %calltmp = call double @cos(double 1.234000e+00)
-        ret double %calltmp
-}
-</pre>
-</div>
-
-<p>This shows an extern for the libm "cos" function, and a call to it.</p>
-
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>^D</b>
-; ModuleID = 'my cool jit'
-
-define double @""() {
-entry:
-        %addtmp = fadd double 4.000000e+00, 5.000000e+00
-        ret double %addtmp
-}
-
-define double @foo(double %a, double %b) {
-entry:
-        %multmp = fmul double %a, %a
-        %multmp1 = fmul double 2.000000e+00, %a
-        %multmp2 = fmul double %multmp1, %b
-        %addtmp = fadd double %multmp, %multmp2
-        %multmp3 = fmul double %b, %b
-        %addtmp4 = fadd double %addtmp, %multmp3
-        ret double %addtmp4
-}
-
-define double @bar(double %a) {
-entry:
-        %calltmp = call double @foo(double %a, double 4.000000e+00)
-        %calltmp1 = call double @bar(double 3.133700e+04)
-        %addtmp = fadd double %calltmp, %calltmp1
-        ret double %addtmp
-}
-
-declare double @cos(double)
-
-define double @""() {
-entry:
-        %calltmp = call double @cos(double 1.234000e+00)
-        ret double %calltmp
-}
-</pre>
-</div>
-
-<p>When you quit the current demo, it dumps out the IR for the entire module
-generated.  Here you can see the big picture with all the functions referencing
-each other.</p>
-
-<p>This wraps up the third chapter of the Kaleidoscope tutorial.  Up next, we'll
-describe how to <a href="OCamlLangImpl4.html">add JIT codegen and optimizer
-support</a> to this so we can actually start running code!</p>
-
-</div>
-
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with the
-LLVM code generator.    Because this uses the LLVM libraries, we need to link
-them in.  To do this, we use the <a
-href="http://llvm.org/cmds/llvm-config.html">llvm-config</a> tool to inform
-our makefile/command line about which options to use:</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<dl>
-<dt>_tags:</dt>
-<dd class="doc_code">
-<pre>
-&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
-&lt;*.{byte,native}&gt;: g++, use_llvm, use_llvm_analysis
-</pre>
-</dd>
-
-<dt>myocamlbuild.ml:</dt>
-<dd class="doc_code">
-<pre>
-open Ocamlbuild_plugin;;
-
-ocaml_lib ~extern:true "llvm";;
-ocaml_lib ~extern:true "llvm_analysis";;
-
-flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
-</pre>
-</dd>
-
-<dt>token.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-</pre>
-</dd>
-
-<dt>lexer.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [&lt; ' ('#'); stream &gt;] -&gt;
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [&lt; 'c; stream &gt;] -&gt;
-      [&lt; 'Token.Kwd c; lex stream &gt;]
-
-  (* end of stream. *)
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-
-and lex_number buffer = parser
-  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
-
-and lex_ident buffer = parser
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-
-and lex_comment = parser
-  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
-  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-</pre>
-</dd>
-
-<dt>ast.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto = Prototype of string * string array
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-</pre>
-</dd>
-
-<dt>parser.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
-      let rec parse_args accumulator = parser
-        | [&lt; e=parse_expr; stream &gt;] -&gt;
-            begin parser
-              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
-              | [&lt; &gt;] -&gt; e :: accumulator
-            end stream
-        | [&lt; &gt;] -&gt; accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [&lt; 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [&lt; &gt;] -&gt; Ast.Variable id
-      in
-      parse_ident id stream
-
-  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec &lt; expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the primary expression after the binary operator. *)
-        let rhs = parse_primary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) -&gt;
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec &lt; next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -&gt; rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -&gt; lhs
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [&lt; lhs=parse_primary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')' *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [&lt; e=parse_expr &gt;] -&gt;
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
-</pre>
-</dd>
-
-<dt>codegen.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Code Generation
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-
-let rec codegen_expr = function
-  | Ast.Number n -&gt; const_float double_type n
-  | Ast.Variable name -&gt;
-      (try Hashtbl.find named_values name with
-        | Not_found -&gt; raise (Error "unknown variable name"))
-  | Ast.Binary (op, lhs, rhs) -&gt;
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
-        | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
-        | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
-        | '&lt;' -&gt;
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        | _ -&gt; raise (Error "invalid binary operator")
-      end
-  | Ast.Call (callee, args) -&gt;
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-
-let codegen_proto = function
-  | Ast.Prototype (name, args) -&gt;
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-        | None -&gt; declare_function name ft the_module
-
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f -&gt;
-            (* If 'f' already has a body, reject this. *)
-            if block_begin f &lt;&gt; At_end f then
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if element_type (type_of f) &lt;&gt; ft then
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a -&gt;
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-
-let codegen_func = function
-  | Ast.Function (proto, body) -&gt;
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        the_function
-      with e -&gt;
-        delete_function the_function;
-        raise e
-</pre>
-</dd>
-
-<dt>toplevel.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop stream =
-  match Stream.peek stream with
-  | None -&gt; ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') -&gt;
-      Stream.junk stream;
-      main_loop stream
-
-  | Some token -&gt;
-      begin
-        try match token with
-        | Token.Def -&gt;
-            let e = Parser.parse_definition stream in
-            print_endline "parsed a function definition.";
-            dump_value (Codegen.codegen_func e);
-        | Token.Extern -&gt;
-            let e = Parser.parse_extern stream in
-            print_endline "parsed an extern.";
-            dump_value (Codegen.codegen_proto e);
-        | _ -&gt;
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            dump_value (Codegen.codegen_func e);
-        with Stream.Error s | Codegen.Error s -&gt;
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready&gt; "; flush stdout;
-      main_loop stream
-</pre>
-</dd>
-
-<dt>toy.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-let main () =
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '&lt;' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready&gt; "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop stream;
-
-  (* Print out all the generated code. *)
-  dump_module Codegen.the_module
-;;
-
-main ()
-</pre>
-</dd>
-</dl>
-
-<a href="OCamlLangImpl4.html">Next: Adding JIT and Optimizer Support</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/OCamlLangImpl3.rst b/docs/tutorial/OCamlLangImpl3.rst
new file mode 100644
index 0000000000..d2a47b486c
--- /dev/null
+++ b/docs/tutorial/OCamlLangImpl3.rst
@@ -0,0 +1,964 @@
+========================================
+Kaleidoscope: Code generation to LLVM IR
+========================================
+
+.. contents::
+   :local:
+
+Written by `Chris Lattner <mailto:sabre@nondot.org>`_ and `Erick
+Tryzelaar <mailto:idadesub@users.sourceforge.net>`_
+
+Chapter 3 Introduction
+======================
+
+Welcome to Chapter 3 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. This chapter shows you how to transform
+the `Abstract Syntax Tree <OCamlLangImpl2.html>`_, built in Chapter 2,
+into LLVM IR. This will teach you a little bit about how LLVM does
+things, as well as demonstrate how easy it is to use. It's much more
+work to build a lexer and parser than it is to generate LLVM IR code. :)
+
+**Please note**: the code in this chapter and later require LLVM 2.3 or
+LLVM SVN to work. LLVM 2.2 and before will not work with it.
+
+Code Generation Setup
+=====================
+
+In order to generate LLVM IR, we want some simple setup to get started.
+First we define virtual code generation (codegen) methods in each AST
+class:
+
+.. code-block:: ocaml
+
+    let rec codegen_expr = function
+      | Ast.Number n -> ...
+      | Ast.Variable name -> ...
+
+The ``Codegen.codegen_expr`` function says to emit IR for that AST node
+along with all the things it depends on, and they all return an LLVM
+Value object. "Value" is the class used to represent a "`Static Single
+Assignment
+(SSA) <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_
+register" or "SSA value" in LLVM. The most distinct aspect of SSA values
+is that their value is computed as the related instruction executes, and
+it does not get a new value until (and if) the instruction re-executes.
+In other words, there is no way to "change" an SSA value. For more
+information, please read up on `Static Single
+Assignment <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_
+- the concepts are really quite natural once you grok them.
+
+The second thing we want is an "Error" exception like we used for the
+parser, which will be used to report errors found during code generation
+(for example, use of an undeclared parameter):
+
+.. code-block:: ocaml
+
+    exception Error of string
+
+    let context = global_context ()
+    let the_module = create_module context "my cool jit"
+    let builder = builder context
+    let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+    let double_type = double_type context
+
+The static variables will be used during code generation.
+``Codgen.the_module`` is the LLVM construct that contains all of the
+functions and global variables in a chunk of code. In many ways, it is
+the top-level structure that the LLVM IR uses to contain code.
+
+The ``Codegen.builder`` object is a helper object that makes it easy to
+generate LLVM instructions. Instances of the
+```IRBuilder`` <http://llvm.org/doxygen/IRBuilder_8h-source.html>`_
+class keep track of the current place to insert instructions and has
+methods to create new instructions.
+
+The ``Codegen.named_values`` map keeps track of which values are defined
+in the current scope and what their LLVM representation is. (In other
+words, it is a symbol table for the code). In this form of Kaleidoscope,
+the only things that can be referenced are function parameters. As such,
+function parameters will be in this map when generating code for their
+function body.
+
+With these basics in place, we can start talking about how to generate
+code for each expression. Note that this assumes that the
+``Codgen.builder`` has been set up to generate code *into* something.
+For now, we'll assume that this has already been done, and we'll just
+use it to emit code.
+
+Expression Code Generation
+==========================
+
+Generating LLVM code for expression nodes is very straightforward: less
+than 30 lines of commented code for all four of our expression nodes.
+First we'll do numeric literals:
+
+.. code-block:: ocaml
+
+      | Ast.Number n -> const_float double_type n
+
+In the LLVM IR, numeric constants are represented with the
+``ConstantFP`` class, which holds the numeric value in an ``APFloat``
+internally (``APFloat`` has the capability of holding floating point
+constants of Arbitrary Precision). This code basically just creates
+and returns a ``ConstantFP``. Note that in the LLVM IR that constants
+are all uniqued together and shared. For this reason, the API uses "the
+foo::get(..)" idiom instead of "new foo(..)" or "foo::Create(..)".
+
+.. code-block:: ocaml
+
+      | Ast.Variable name ->
+          (try Hashtbl.find named_values name with
+            | Not_found -> raise (Error "unknown variable name"))
+
+References to variables are also quite simple using LLVM. In the simple
+version of Kaleidoscope, we assume that the variable has already been
+emitted somewhere and its value is available. In practice, the only
+values that can be in the ``Codegen.named_values`` map are function
+arguments. This code simply checks to see that the specified name is in
+the map (if not, an unknown variable is being referenced) and returns
+the value for it. In future chapters, we'll add support for `loop
+induction variables <LangImpl5.html#for>`_ in the symbol table, and for
+`local variables <LangImpl7.html#localvars>`_.
+
+.. code-block:: ocaml
+
+      | Ast.Binary (op, lhs, rhs) ->
+          let lhs_val = codegen_expr lhs in
+          let rhs_val = codegen_expr rhs in
+          begin
+            match op with
+            | '+' -> build_fadd lhs_val rhs_val "addtmp" builder
+            | '-' -> build_fsub lhs_val rhs_val "subtmp" builder
+            | '*' -> build_fmul lhs_val rhs_val "multmp" builder
+            | '<' ->
+                (* Convert bool 0/1 to double 0.0 or 1.0 *)
+                let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+                build_uitofp i double_type "booltmp" builder
+            | _ -> raise (Error "invalid binary operator")
+          end
+
+Binary operators start to get more interesting. The basic idea here is
+that we recursively emit code for the left-hand side of the expression,
+then the right-hand side, then we compute the result of the binary
+expression. In this code, we do a simple switch on the opcode to create
+the right LLVM instruction.
+
+In the example above, the LLVM builder class is starting to show its
+value. IRBuilder knows where to insert the newly created instruction,
+all you have to do is specify what instruction to create (e.g. with
+``Llvm.create_add``), which operands to use (``lhs`` and ``rhs`` here)
+and optionally provide a name for the generated instruction.
+
+One nice thing about LLVM is that the name is just a hint. For instance,
+if the code above emits multiple "addtmp" variables, LLVM will
+automatically provide each one with an increasing, unique numeric
+suffix. Local value names for instructions are purely optional, but it
+makes it much easier to read the IR dumps.
+
+`LLVM instructions <../LangRef.html#instref>`_ are constrained by strict
+rules: for example, the Left and Right operators of an `add
+instruction <../LangRef.html#i_add>`_ must have the same type, and the
+result type of the add must match the operand types. Because all values
+in Kaleidoscope are doubles, this makes for very simple code for add,
+sub and mul.
+
+On the other hand, LLVM specifies that the `fcmp
+instruction <../LangRef.html#i_fcmp>`_ always returns an 'i1' value (a
+one bit integer). The problem with this is that Kaleidoscope wants the
+value to be a 0.0 or 1.0 value. In order to get these semantics, we
+combine the fcmp instruction with a `uitofp
+instruction <../LangRef.html#i_uitofp>`_. This instruction converts its
+input integer into a floating point value by treating the input as an
+unsigned value. In contrast, if we used the `sitofp
+instruction <../LangRef.html#i_sitofp>`_, the Kaleidoscope '<' operator
+would return 0.0 and -1.0, depending on the input value.
+
+.. code-block:: ocaml
+
+      | Ast.Call (callee, args) ->
+          (* Look up the name in the module table. *)
+          let callee =
+            match lookup_function callee the_module with
+            | Some callee -> callee
+            | None -> raise (Error "unknown function referenced")
+          in
+          let params = params callee in
+
+          (* If argument mismatch error. *)
+          if Array.length params == Array.length args then () else
+            raise (Error "incorrect # arguments passed");
+          let args = Array.map codegen_expr args in
+          build_call callee args "calltmp" builder
+
+Code generation for function calls is quite straightforward with LLVM.
+The code above initially does a function name lookup in the LLVM
+Module's symbol table. Recall that the LLVM Module is the container that
+holds all of the functions we are JIT'ing. By giving each function the
+same name as what the user specifies, we can use the LLVM symbol table
+to resolve function names for us.
+
+Once we have the function to call, we recursively codegen each argument
+that is to be passed in, and create an LLVM `call
+instruction <../LangRef.html#i_call>`_. Note that LLVM uses the native C
+calling conventions by default, allowing these calls to also call into
+standard library functions like "sin" and "cos", with no additional
+effort.
+
+This wraps up our handling of the four basic expressions that we have so
+far in Kaleidoscope. Feel free to go in and add some more. For example,
+by browsing the `LLVM language reference <../LangRef.html>`_ you'll find
+several other interesting instructions that are really easy to plug into
+our basic framework.
+
+Function Code Generation
+========================
+
+Code generation for prototypes and functions must handle a number of
+details, which make their code less beautiful than expression code
+generation, but allows us to illustrate some important points. First,
+lets talk about code generation for prototypes: they are used both for
+function bodies and external function declarations. The code starts
+with:
+
+.. code-block:: ocaml
+
+    let codegen_proto = function
+      | Ast.Prototype (name, args) ->
+          (* Make the function type: double(double,double) etc. *)
+          let doubles = Array.make (Array.length args) double_type in
+          let ft = function_type double_type doubles in
+          let f =
+            match lookup_function name the_module with
+
+This code packs a lot of power into a few lines. Note first that this
+function returns a "Function\*" instead of a "Value\*" (although at the
+moment they both are modeled by ``llvalue`` in ocaml). Because a
+"prototype" really talks about the external interface for a function
+(not the value computed by an expression), it makes sense for it to
+return the LLVM Function it corresponds to when codegen'd.
+
+The call to ``Llvm.function_type`` creates the ``Llvm.llvalue`` that
+should be used for a given Prototype. Since all function arguments in
+Kaleidoscope are of type double, the first line creates a vector of "N"
+LLVM double types. It then uses the ``Llvm.function_type`` method to
+create a function type that takes "N" doubles as arguments, returns one
+double as a result, and that is not vararg (that uses the function
+``Llvm.var_arg_function_type``). Note that Types in LLVM are uniqued
+just like ``Constant``'s are, so you don't "new" a type, you "get" it.
+
+The final line above checks if the function has already been defined in
+``Codegen.the_module``. If not, we will create it.
+
+.. code-block:: ocaml
+
+            | None -> declare_function name ft the_module
+
+This indicates the type and name to use, as well as which module to
+insert into. By default we assume a function has
+``Llvm.Linkage.ExternalLinkage``. "`external
+linkage <LangRef.html#linkage>`_" means that the function may be defined
+outside the current module and/or that it is callable by functions
+outside the module. The "``name``" passed in is the name the user
+specified: this name is registered in "``Codegen.the_module``"s symbol
+table, which is used by the function call code above.
+
+In Kaleidoscope, I choose to allow redefinitions of functions in two
+cases: first, we want to allow 'extern'ing a function more than once, as
+long as the prototypes for the externs match (since all arguments have
+the same type, we just have to check that the number of arguments
+match). Second, we want to allow 'extern'ing a function and then
+defining a body for it. This is useful when defining mutually recursive
+functions.
+
+.. code-block:: ocaml
+
+            (* If 'f' conflicted, there was already something named 'name'. If it
+             * has a body, don't allow redefinition or reextern. *)
+            | Some f ->
+                (* If 'f' already has a body, reject this. *)
+                if Array.length (basic_blocks f) == 0 then () else
+                  raise (Error "redefinition of function");
+
+                (* If 'f' took a different number of arguments, reject. *)
+                if Array.length (params f) == Array.length args then () else
+                  raise (Error "redefinition of function with different # args");
+                f
+          in
+
+In order to verify the logic above, we first check to see if the
+pre-existing function is "empty". In this case, empty means that it has
+no basic blocks in it, which means it has no body. If it has no body, it
+is a forward declaration. Since we don't allow anything after a full
+definition of the function, the code rejects this case. If the previous
+reference to a function was an 'extern', we simply verify that the
+number of arguments for that definition and this one match up. If not,
+we emit an error.
+
+.. code-block:: ocaml
+
+          (* Set names for all arguments. *)
+          Array.iteri (fun i a ->
+            let n = args.(i) in
+            set_value_name n a;
+            Hashtbl.add named_values n a;
+          ) (params f);
+          f
+
+The last bit of code for prototypes loops over all of the arguments in
+the function, setting the name of the LLVM Argument objects to match,
+and registering the arguments in the ``Codegen.named_values`` map for
+future use by the ``Ast.Variable`` variant. Once this is set up, it
+returns the Function object to the caller. Note that we don't check for
+conflicting argument names here (e.g. "extern foo(a b a)"). Doing so
+would be very straight-forward with the mechanics we have already used
+above.
+
+.. code-block:: ocaml
+
+    let codegen_func = function
+      | Ast.Function (proto, body) ->
+          Hashtbl.clear named_values;
+          let the_function = codegen_proto proto in
+
+Code generation for function definitions starts out simply enough: we
+just codegen the prototype (Proto) and verify that it is ok. We then
+clear out the ``Codegen.named_values`` map to make sure that there isn't
+anything in it from the last function we compiled. Code generation of
+the prototype ensures that there is an LLVM Function object that is
+ready to go for us.
+
+.. code-block:: ocaml
+
+          (* Create a new basic block to start insertion into. *)
+          let bb = append_block context "entry" the_function in
+          position_at_end bb builder;
+
+          try
+            let ret_val = codegen_expr body in
+
+Now we get to the point where the ``Codegen.builder`` is set up. The
+first line creates a new `basic
+block <http://en.wikipedia.org/wiki/Basic_block>`_ (named "entry"),
+which is inserted into ``the_function``. The second line then tells the
+builder that new instructions should be inserted into the end of the new
+basic block. Basic blocks in LLVM are an important part of functions
+that define the `Control Flow
+Graph <http://en.wikipedia.org/wiki/Control_flow_graph>`_. Since we
+don't have any control flow, our functions will only contain one block
+at this point. We'll fix this in `Chapter 5 <OCamlLangImpl5.html>`_ :).
+
+.. code-block:: ocaml
+
+            let ret_val = codegen_expr body in
+
+            (* Finish off the function. *)
+            let _ = build_ret ret_val builder in
+
+            (* Validate the generated code, checking for consistency. *)
+            Llvm_analysis.assert_valid_function the_function;
+
+            the_function
+
+Once the insertion point is set up, we call the ``Codegen.codegen_func``
+method for the root expression of the function. If no error happens,
+this emits code to compute the expression into the entry block and
+returns the value that was computed. Assuming no error, we then create
+an LLVM `ret instruction <../LangRef.html#i_ret>`_, which completes the
+function. Once the function is built, we call
+``Llvm_analysis.assert_valid_function``, which is provided by LLVM. This
+function does a variety of consistency checks on the generated code, to
+determine if our compiler is doing everything right. Using this is
+important: it can catch a lot of bugs. Once the function is finished and
+validated, we return it.
+
+.. code-block:: ocaml
+
+          with e ->
+            delete_function the_function;
+            raise e
+
+The only piece left here is handling of the error case. For simplicity,
+we handle this by merely deleting the function we produced with the
+``Llvm.delete_function`` method. This allows the user to redefine a
+function that they incorrectly typed in before: if we didn't delete it,
+it would live in the symbol table, with a body, preventing future
+redefinition.
+
+This code does have a bug, though. Since the ``Codegen.codegen_proto``
+can return a previously defined forward declaration, our code can
+actually delete a forward declaration. There are a number of ways to fix
+this bug, see what you can come up with! Here is a testcase:
+
+::
+
+    extern foo(a b);     # ok, defines foo.
+    def foo(a b) c;      # error, 'c' is invalid.
+    def bar() foo(1, 2); # error, unknown function "foo"
+
+Driver Changes and Closing Thoughts
+===================================
+
+For now, code generation to LLVM doesn't really get us much, except that
+we can look at the pretty IR calls. The sample code inserts calls to
+Codegen into the "``Toplevel.main_loop``", and then dumps out the LLVM
+IR. This gives a nice way to look at the LLVM IR for simple functions.
+For example:
+
+::
+
+    ready> 4+5;
+    Read top-level expression:
+    define double @""() {
+    entry:
+            %addtmp = fadd double 4.000000e+00, 5.000000e+00
+            ret double %addtmp
+    }
+
+Note how the parser turns the top-level expression into anonymous
+functions for us. This will be handy when we add `JIT
+support <OCamlLangImpl4.html#jit>`_ in the next chapter. Also note that
+the code is very literally transcribed, no optimizations are being
+performed. We will `add
+optimizations <OCamlLangImpl4.html#trivialconstfold>`_ explicitly in the
+next chapter.
+
+::
+
+    ready> def foo(a b) a*a + 2*a*b + b*b;
+    Read function definition:
+    define double @foo(double %a, double %b) {
+    entry:
+            %multmp = fmul double %a, %a
+            %multmp1 = fmul double 2.000000e+00, %a
+            %multmp2 = fmul double %multmp1, %b
+            %addtmp = fadd double %multmp, %multmp2
+            %multmp3 = fmul double %b, %b
+            %addtmp4 = fadd double %addtmp, %multmp3
+            ret double %addtmp4
+    }
+
+This shows some simple arithmetic. Notice the striking similarity to the
+LLVM builder calls that we use to create the instructions.
+
+::
+
+    ready> def bar(a) foo(a, 4.0) + bar(31337);
+    Read function definition:
+    define double @bar(double %a) {
+    entry:
+            %calltmp = call double @foo(double %a, double 4.000000e+00)
+            %calltmp1 = call double @bar(double 3.133700e+04)
+            %addtmp = fadd double %calltmp, %calltmp1
+            ret double %addtmp
+    }
+
+This shows some function calls. Note that this function will take a long
+time to execute if you call it. In the future we'll add conditional
+control flow to actually make recursion useful :).
+
+::
+
+    ready> extern cos(x);
+    Read extern:
+    declare double @cos(double)
+
+    ready> cos(1.234);
+    Read top-level expression:
+    define double @""() {
+    entry:
+            %calltmp = call double @cos(double 1.234000e+00)
+            ret double %calltmp
+    }
+
+This shows an extern for the libm "cos" function, and a call to it.
+
+::
+
+    ready> ^D
+    ; ModuleID = 'my cool jit'
+
+    define double @""() {
+    entry:
+            %addtmp = fadd double 4.000000e+00, 5.000000e+00
+            ret double %addtmp
+    }
+
+    define double @foo(double %a, double %b) {
+    entry:
+            %multmp = fmul double %a, %a
+            %multmp1 = fmul double 2.000000e+00, %a
+            %multmp2 = fmul double %multmp1, %b
+            %addtmp = fadd double %multmp, %multmp2
+            %multmp3 = fmul double %b, %b
+            %addtmp4 = fadd double %addtmp, %multmp3
+            ret double %addtmp4
+    }
+
+    define double @bar(double %a) {
+    entry:
+            %calltmp = call double @foo(double %a, double 4.000000e+00)
+            %calltmp1 = call double @bar(double 3.133700e+04)
+            %addtmp = fadd double %calltmp, %calltmp1
+            ret double %addtmp
+    }
+
+    declare double @cos(double)
+
+    define double @""() {
+    entry:
+            %calltmp = call double @cos(double 1.234000e+00)
+            ret double %calltmp
+    }
+
+When you quit the current demo, it dumps out the IR for the entire
+module generated. Here you can see the big picture with all the
+functions referencing each other.
+
+This wraps up the third chapter of the Kaleidoscope tutorial. Up next,
+we'll describe how to `add JIT codegen and optimizer
+support <OCamlLangImpl4.html>`_ to this so we can actually start running
+code!
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+the LLVM code generator. Because this uses the LLVM libraries, we need
+to link them in. To do this, we use the
+`llvm-config <http://llvm.org/cmds/llvm-config.html>`_ tool to inform
+our makefile/command line about which options to use:
+
+.. code-block:: bash
+
+    # Compile
+    ocamlbuild toy.byte
+    # Run
+    ./toy.byte
+
+Here is the code:
+
+\_tags:
+    ::
+
+        <{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
+        <*.{byte,native}>: g++, use_llvm, use_llvm_analysis
+
+myocamlbuild.ml:
+    .. code-block:: ocaml
+
+        open Ocamlbuild_plugin;;
+
+        ocaml_lib ~extern:true "llvm";;
+        ocaml_lib ~extern:true "llvm_analysis";;
+
+        flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
+
+token.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer Tokens
+         *===----------------------------------------------------------------------===*)
+
+        (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+         * these others for known things. *)
+        type token =
+          (* commands *)
+          | Def | Extern
+
+          (* primary *)
+          | Ident of string | Number of float
+
+          (* unknown *)
+          | Kwd of char
+
+lexer.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer
+         *===----------------------------------------------------------------------===*)
+
+        let rec lex = parser
+          (* Skip any whitespace. *)
+          | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+          (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+
+          (* number: [0-9.]+ *)
+          | [< ' ('0' .. '9' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+
+          (* Comment until end of line. *)
+          | [< ' ('#'); stream >] ->
+              lex_comment stream
+
+          (* Otherwise, just return the character as its ascii value. *)
+          | [< 'c; stream >] ->
+              [< 'Token.Kwd c; lex stream >]
+
+          (* end of stream. *)
+          | [< >] -> [< >]
+
+        and lex_number buffer = parser
+          | [< ' ('0' .. '9' | '.' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+          | [< stream=lex >] ->
+              [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+        and lex_ident buffer = parser
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+          | [< stream=lex >] ->
+              match Buffer.contents buffer with
+              | "def" -> [< 'Token.Def; stream >]
+              | "extern" -> [< 'Token.Extern; stream >]
+              | id -> [< 'Token.Ident id; stream >]
+
+        and lex_comment = parser
+          | [< ' ('\n'); stream=lex >] -> stream
+          | [< 'c; e=lex_comment >] -> e
+          | [< >] -> [< >]
+
+ast.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Abstract Syntax Tree (aka Parse Tree)
+         *===----------------------------------------------------------------------===*)
+
+        (* expr - Base type for all expression nodes. *)
+        type expr =
+          (* variant for numeric literals like "1.0". *)
+          | Number of float
+
+          (* variant for referencing a variable, like "a". *)
+          | Variable of string
+
+          (* variant for a binary operator. *)
+          | Binary of char * expr * expr
+
+          (* variant for function calls. *)
+          | Call of string * expr array
+
+        (* proto - This type represents the "prototype" for a function, which captures
+         * its name, and its argument names (thus implicitly the number of arguments the
+         * function takes). *)
+        type proto = Prototype of string * string array
+
+        (* func - This type represents a function definition itself. *)
+        type func = Function of proto * expr
+
+parser.ml:
+    .. code-block:: ocaml
+
+        (*===---------------------------------------------------------------------===
+         * Parser
+         *===---------------------------------------------------------------------===*)
+
+        (* binop_precedence - This holds the precedence for each binary operator that is
+         * defined *)
+        let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+        (* precedence - Get the precedence of the pending binary operator token. *)
+        let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+        (* primary
+         *   ::= identifier
+         *   ::= numberexpr
+         *   ::= parenexpr *)
+        let rec parse_primary = parser
+          (* numberexpr ::= number *)
+          | [< 'Token.Number n >] -> Ast.Number n
+
+          (* parenexpr ::= '(' expression ')' *)
+          | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+          (* identifierexpr
+           *   ::= identifier
+           *   ::= identifier '(' argumentexpr ')' *)
+          | [< 'Token.Ident id; stream >] ->
+              let rec parse_args accumulator = parser
+                | [< e=parse_expr; stream >] ->
+                    begin parser
+                      | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+                      | [< >] -> e :: accumulator
+                    end stream
+                | [< >] -> accumulator
+              in
+              let rec parse_ident id = parser
+                (* Call. *)
+                | [< 'Token.Kwd '(';
+                     args=parse_args [];
+                     'Token.Kwd ')' ?? "expected ')'">] ->
+                    Ast.Call (id, Array.of_list (List.rev args))
+
+                (* Simple variable ref. *)
+                | [< >] -> Ast.Variable id
+              in
+              parse_ident id stream
+
+          | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+        (* binoprhs
+         *   ::= ('+' primary)* *)
+        and parse_bin_rhs expr_prec lhs stream =
+          match Stream.peek stream with
+          (* If this is a binop, find its precedence. *)
+          | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+              let token_prec = precedence c in
+
+              (* If this is a binop that binds at least as tightly as the current binop,
+               * consume it, otherwise we are done. *)
+              if token_prec < expr_prec then lhs else begin
+                (* Eat the binop. *)
+                Stream.junk stream;
+
+                (* Parse the primary expression after the binary operator. *)
+                let rhs = parse_primary stream in
+
+                (* Okay, we know this is a binop. *)
+                let rhs =
+                  match Stream.peek stream with
+                  | Some (Token.Kwd c2) ->
+                      (* If BinOp binds less tightly with rhs than the operator after
+                       * rhs, let the pending operator take rhs as its lhs. *)
+                      let next_prec = precedence c2 in
+                      if token_prec < next_prec
+                      then parse_bin_rhs (token_prec + 1) rhs stream
+                      else rhs
+                  | _ -> rhs
+                in
+
+                (* Merge lhs/rhs. *)
+                let lhs = Ast.Binary (c, lhs, rhs) in
+                parse_bin_rhs expr_prec lhs stream
+              end
+          | _ -> lhs
+
+        (* expression
+         *   ::= primary binoprhs *)
+        and parse_expr = parser
+          | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
+
+        (* prototype
+         *   ::= id '(' id* ')' *)
+        let parse_prototype =
+          let rec parse_args accumulator = parser
+            | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+            | [< >] -> accumulator
+          in
+
+          parser
+          | [< 'Token.Ident id;
+               'Token.Kwd '(' ?? "expected '(' in prototype";
+               args=parse_args [];
+               'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+              (* success. *)
+              Ast.Prototype (id, Array.of_list (List.rev args))
+
+          | [< >] ->
+              raise (Stream.Error "expected function name in prototype")
+
+        (* definition ::= 'def' prototype expression *)
+        let parse_definition = parser
+          | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+              Ast.Function (p, e)
+
+        (* toplevelexpr ::= expression *)
+        let parse_toplevel = parser
+          | [< e=parse_expr >] ->
+              (* Make an anonymous proto. *)
+              Ast.Function (Ast.Prototype ("", [||]), e)
+
+        (*  external ::= 'extern' prototype *)
+        let parse_extern = parser
+          | [< 'Token.Extern; e=parse_prototype >] -> e
+
+codegen.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Code Generation
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+
+        exception Error of string
+
+        let context = global_context ()
+        let the_module = create_module context "my cool jit"
+        let builder = builder context
+        let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+        let double_type = double_type context
+
+        let rec codegen_expr = function
+          | Ast.Number n -> const_float double_type n
+          | Ast.Variable name ->
+              (try Hashtbl.find named_values name with
+                | Not_found -> raise (Error "unknown variable name"))
+          | Ast.Binary (op, lhs, rhs) ->
+              let lhs_val = codegen_expr lhs in
+              let rhs_val = codegen_expr rhs in
+              begin
+                match op with
+                | '+' -> build_add lhs_val rhs_val "addtmp" builder
+                | '-' -> build_sub lhs_val rhs_val "subtmp" builder
+                | '*' -> build_mul lhs_val rhs_val "multmp" builder
+                | '<' ->
+                    (* Convert bool 0/1 to double 0.0 or 1.0 *)
+                    let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+                    build_uitofp i double_type "booltmp" builder
+                | _ -> raise (Error "invalid binary operator")
+              end
+          | Ast.Call (callee, args) ->
+              (* Look up the name in the module table. *)
+              let callee =
+                match lookup_function callee the_module with
+                | Some callee -> callee
+                | None -> raise (Error "unknown function referenced")
+              in
+              let params = params callee in
+
+              (* If argument mismatch error. *)
+              if Array.length params == Array.length args then () else
+                raise (Error "incorrect # arguments passed");
+              let args = Array.map codegen_expr args in
+              build_call callee args "calltmp" builder
+
+        let codegen_proto = function
+          | Ast.Prototype (name, args) ->
+              (* Make the function type: double(double,double) etc. *)
+              let doubles = Array.make (Array.length args) double_type in
+              let ft = function_type double_type doubles in
+              let f =
+                match lookup_function name the_module with
+                | None -> declare_function name ft the_module
+
+                (* If 'f' conflicted, there was already something named 'name'. If it
+                 * has a body, don't allow redefinition or reextern. *)
+                | Some f ->
+                    (* If 'f' already has a body, reject this. *)
+                    if block_begin f <> At_end f then
+                      raise (Error "redefinition of function");
+
+                    (* If 'f' took a different number of arguments, reject. *)
+                    if element_type (type_of f) <> ft then
+                      raise (Error "redefinition of function with different # args");
+                    f
+              in
+
+              (* Set names for all arguments. *)
+              Array.iteri (fun i a ->
+                let n = args.(i) in
+                set_value_name n a;
+                Hashtbl.add named_values n a;
+              ) (params f);
+              f
+
+        let codegen_func = function
+          | Ast.Function (proto, body) ->
+              Hashtbl.clear named_values;
+              let the_function = codegen_proto proto in
+
+              (* Create a new basic block to start insertion into. *)
+              let bb = append_block context "entry" the_function in
+              position_at_end bb builder;
+
+              try
+                let ret_val = codegen_expr body in
+
+                (* Finish off the function. *)
+                let _ = build_ret ret_val builder in
+
+                (* Validate the generated code, checking for consistency. *)
+                Llvm_analysis.assert_valid_function the_function;
+
+                the_function
+              with e ->
+                delete_function the_function;
+                raise e
+
+toplevel.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Top-Level parsing and JIT Driver
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+
+        (* top ::= definition | external | expression | ';' *)
+        let rec main_loop stream =
+          match Stream.peek stream with
+          | None -> ()
+
+          (* ignore top-level semicolons. *)
+          | Some (Token.Kwd ';') ->
+              Stream.junk stream;
+              main_loop stream
+
+          | Some token ->
+              begin
+                try match token with
+                | Token.Def ->
+                    let e = Parser.parse_definition stream in
+                    print_endline "parsed a function definition.";
+                    dump_value (Codegen.codegen_func e);
+                | Token.Extern ->
+                    let e = Parser.parse_extern stream in
+                    print_endline "parsed an extern.";
+                    dump_value (Codegen.codegen_proto e);
+                | _ ->
+                    (* Evaluate a top-level expression into an anonymous function. *)
+                    let e = Parser.parse_toplevel stream in
+                    print_endline "parsed a top-level expr";
+                    dump_value (Codegen.codegen_func e);
+                with Stream.Error s | Codegen.Error s ->
+                  (* Skip token for error recovery. *)
+                  Stream.junk stream;
+                  print_endline s;
+              end;
+              print_string "ready> "; flush stdout;
+              main_loop stream
+
+toy.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Main driver code.
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+
+        let main () =
+          (* Install standard binary operators.
+           * 1 is the lowest precedence. *)
+          Hashtbl.add Parser.binop_precedence '<' 10;
+          Hashtbl.add Parser.binop_precedence '+' 20;
+          Hashtbl.add Parser.binop_precedence '-' 20;
+          Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+          (* Prime the first token. *)
+          print_string "ready> "; flush stdout;
+          let stream = Lexer.lex (Stream.of_channel stdin) in
+
+          (* Run the main "interpreter loop" now. *)
+          Toplevel.main_loop stream;
+
+          (* Print out all the generated code. *)
+          dump_module Codegen.the_module
+        ;;
+
+        main ()
+
+`Next: Adding JIT and Optimizer Support <OCamlLangImpl4.html>`_
+
diff --git a/docs/tutorial/OCamlLangImpl4.html b/docs/tutorial/OCamlLangImpl4.html
deleted file mode 100644
index eb97d986c2..0000000000
--- a/docs/tutorial/OCamlLangImpl4.html
+++ /dev/null
@@ -1,1026 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Adding JIT and Optimizer Support</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <meta name="author" content="Erick Tryzelaar">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Adding JIT and Optimizer Support</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 4
-  <ol>
-    <li><a href="#intro">Chapter 4 Introduction</a></li>
-    <li><a href="#trivialconstfold">Trivial Constant Folding</a></li>
-    <li><a href="#optimizerpasses">LLVM Optimization Passes</a></li>
-    <li><a href="#jit">Adding a JIT Compiler</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="OCamlLangImpl5.html">Chapter 5</a>: Extending the Language: Control
-Flow</li>
-</ul>
-
-<div class="doc_author">
-	<p>
-		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
-	</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 4 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 4 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  Chapters 1-3 described the implementation of a simple
-language and added support for generating LLVM IR.  This chapter describes
-two new techniques: adding optimizer support to your language, and adding JIT
-compiler support.  These additions will demonstrate how to get nice, efficient code
-for the Kaleidoscope language.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="trivialconstfold">Trivial Constant Folding</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p><b>Note:</b> the default <tt>IRBuilder</tt> now always includes the constant 
-folding optimisations below.<p>
-
-<p>
-Our demonstration for Chapter 3 is elegant and easy to extend.  Unfortunately,
-it does not produce wonderful code.  For example, when compiling simple code,
-we don't get obvious optimizations:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def test(x) 1+2+x;</b>
-Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 1.000000e+00, 2.000000e+00
-        %addtmp1 = fadd double %addtmp, %x
-        ret double %addtmp1
-}
-</pre>
-</div>
-
-<p>This code is a very, very literal transcription of the AST built by parsing
-the input. As such, this transcription lacks optimizations like constant folding
-(we'd like to get "<tt>add x, 3.0</tt>" in the example above) as well as other
-more important optimizations.  Constant folding, in particular, is a very common
-and very important optimization: so much so that many language implementors
-implement constant folding support in their AST representation.</p>
-
-<p>With LLVM, you don't need this support in the AST.  Since all calls to build
-LLVM IR go through the LLVM builder, it would be nice if the builder itself
-checked to see if there was a constant folding opportunity when you call it.
-If so, it could just do the constant fold and return the constant instead of
-creating an instruction.  This is exactly what the <tt>LLVMFoldingBuilder</tt>
-class does.
-
-<p>All we did was switch from <tt>LLVMBuilder</tt> to
-<tt>LLVMFoldingBuilder</tt>.  Though we change no other code, we now have all of our
-instructions implicitly constant folded without us having to do anything
-about it.  For example, the input above now compiles to:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def test(x) 1+2+x;</b>
-Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 3.000000e+00, %x
-        ret double %addtmp
-}
-</pre>
-</div>
-
-<p>Well, that was easy :).  In practice, we recommend always using
-<tt>LLVMFoldingBuilder</tt> when generating code like this.  It has no
-"syntactic overhead" for its use (you don't have to uglify your compiler with
-constant checks everywhere) and it can dramatically reduce the amount of
-LLVM IR that is generated in some cases (particular for languages with a macro
-preprocessor or that use a lot of constants).</p>
-
-<p>On the other hand, the <tt>LLVMFoldingBuilder</tt> is limited by the fact
-that it does all of its analysis inline with the code as it is built.  If you
-take a slightly more complex example:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def test(x) (1+2+x)*(x+(1+2));</b>
-ready&gt; Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double 3.000000e+00, %x
-        %addtmp1 = fadd double %x, 3.000000e+00
-        %multmp = fmul double %addtmp, %addtmp1
-        ret double %multmp
-}
-</pre>
-</div>
-
-<p>In this case, the LHS and RHS of the multiplication are the same value.  We'd
-really like to see this generate "<tt>tmp = x+3; result = tmp*tmp;</tt>" instead
-of computing "<tt>x*3</tt>" twice.</p>
-
-<p>Unfortunately, no amount of local analysis will be able to detect and correct
-this.  This requires two transformations: reassociation of expressions (to
-make the add's lexically identical) and Common Subexpression Elimination (CSE)
-to  delete the redundant add instruction.  Fortunately, LLVM provides a broad
-range of optimizations that you can use, in the form of "passes".</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="optimizerpasses">LLVM Optimization Passes</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>LLVM provides many optimization passes, which do many different sorts of
-things and have different tradeoffs.  Unlike other systems, LLVM doesn't hold
-to the mistaken notion that one set of optimizations is right for all languages
-and for all situations.  LLVM allows a compiler implementor to make complete
-decisions about what optimizations to use, in which order, and in what
-situation.</p>
-
-<p>As a concrete example, LLVM supports both "whole module" passes, which look
-across as large of body of code as they can (often a whole file, but if run
-at link time, this can be a substantial portion of the whole program).  It also
-supports and includes "per-function" passes which just operate on a single
-function at a time, without looking at other functions.  For more information
-on passes and how they are run, see the <a href="../WritingAnLLVMPass.html">How
-to Write a Pass</a> document and the <a href="../Passes.html">List of LLVM
-Passes</a>.</p>
-
-<p>For Kaleidoscope, we are currently generating functions on the fly, one at
-a time, as the user types them in.  We aren't shooting for the ultimate
-optimization experience in this setting, but we also want to catch the easy and
-quick stuff where possible.  As such, we will choose to run a few per-function
-optimizations as the user types the function in.  If we wanted to make a "static
-Kaleidoscope compiler", we would use exactly the code we have now, except that
-we would defer running the optimizer until the entire file has been parsed.</p>
-
-<p>In order to get per-function optimizations going, we need to set up a
-<a href="../WritingAnLLVMPass.html#passmanager">Llvm.PassManager</a> to hold and
-organize the LLVM optimizations that we want to run.  Once we have that, we can
-add a set of optimizations to run.  The code looks like this:</p>
-
-<div class="doc_code">
-<pre>
-  (* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combining the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-
-  (* Eliminate Common SubExpressions. *)
-  add_gvn the_fpm;
-
-  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
-  add_cfg_simplification the_fpm;
-
-  ignore (PassManager.initialize the_fpm);
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop the_fpm the_execution_engine stream;
-</pre>
-</div>
-
-<p>The meat of the matter here, is the definition of "<tt>the_fpm</tt>".  It
-requires a pointer to the <tt>the_module</tt> to construct itself.  Once it is
-set up, we use a series of "add" calls to add a bunch of LLVM passes.  The
-first pass is basically boilerplate, it adds a pass so that later optimizations
-know how the data structures in the program are laid out.  The
-"<tt>the_execution_engine</tt>" variable is related to the JIT, which we will
-get to in the next section.</p>
-
-<p>In this case, we choose to add 4 optimization passes.  The passes we chose
-here are a pretty standard set of "cleanup" optimizations that are useful for
-a wide variety of code.  I won't delve into what they do but, believe me,
-they are a good starting place :).</p>
-
-<p>Once the <tt>Llvm.PassManager.</tt> is set up, we need to make use of it.
-We do this by running it after our newly created function is constructed (in
-<tt>Codegen.codegen_func</tt>), but before it is returned to the client:</p>
-
-<div class="doc_code">
-<pre>
-let codegen_func the_fpm = function
-      ...
-      try
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        (* Optimize the function. *)
-        let _ = PassManager.run_function the_function the_fpm in
-
-        the_function
-</pre>
-</div>
-
-<p>As you can see, this is pretty straightforward.  The <tt>the_fpm</tt>
-optimizes and updates the LLVM Function* in place, improving (hopefully) its
-body.  With this in place, we can try our test above again:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def test(x) (1+2+x)*(x+(1+2));</b>
-ready&gt; Read function definition:
-define double @test(double %x) {
-entry:
-        %addtmp = fadd double %x, 3.000000e+00
-        %multmp = fmul double %addtmp, %addtmp
-        ret double %multmp
-}
-</pre>
-</div>
-
-<p>As expected, we now get our nicely optimized code, saving a floating point
-add instruction from every execution of this function.</p>
-
-<p>LLVM provides a wide variety of optimizations that can be used in certain
-circumstances.  Some <a href="../Passes.html">documentation about the various
-passes</a> is available, but it isn't very complete.  Another good source of
-ideas can come from looking at the passes that <tt>Clang</tt> runs to get
-started.  The "<tt>opt</tt>" tool allows you to experiment with passes from the
-command line, so you can see if they do anything.</p>
-
-<p>Now that we have reasonable code coming out of our front-end, lets talk about
-executing it!</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="jit">Adding a JIT Compiler</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Code that is available in LLVM IR can have a wide variety of tools
-applied to it.  For example, you can run optimizations on it (as we did above),
-you can dump it out in textual or binary forms, you can compile the code to an
-assembly file (.s) for some target, or you can JIT compile it.  The nice thing
-about the LLVM IR representation is that it is the "common currency" between
-many different parts of the compiler.
-</p>
-
-<p>In this section, we'll add JIT compiler support to our interpreter.  The
-basic idea that we want for Kaleidoscope is to have the user enter function
-bodies as they do now, but immediately evaluate the top-level expressions they
-type in.  For example, if they type in "1 + 2;", we should evaluate and print
-out 3.  If they define a function, they should be able to call it from the
-command line.</p>
-
-<p>In order to do this, we first declare and initialize the JIT.  This is done
-by adding a global variable and a call in <tt>main</tt>:</p>
-
-<div class="doc_code">
-<pre>
-...
-let main () =
-  ...
-  <b>(* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in</b>
-  ...
-</pre>
-</div>
-
-<p>This creates an abstract "Execution Engine" which can be either a JIT
-compiler or the LLVM interpreter.  LLVM will automatically pick a JIT compiler
-for you if one is available for your platform, otherwise it will fall back to
-the interpreter.</p>
-
-<p>Once the <tt>Llvm_executionengine.ExecutionEngine.t</tt> is created, the JIT
-is ready to be used.  There are a variety of APIs that are useful, but the
-simplest one is the "<tt>Llvm_executionengine.ExecutionEngine.run_function</tt>"
-function.  This method JIT compiles the specified LLVM Function and returns a
-function pointer to the generated machine code.  In our case, this means that we
-can change the code that parses a top-level expression to look like this:</p>
-
-<div class="doc_code">
-<pre>
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            let the_function = Codegen.codegen_func the_fpm e in
-            dump_value the_function;
-
-            (* JIT the function, returning a function pointer. *)
-            let result = ExecutionEngine.run_function the_function [||]
-              the_execution_engine in
-
-            print_string "Evaluated to ";
-            print_float (GenericValue.as_float Codegen.double_type result);
-            print_newline ();
-</pre>
-</div>
-
-<p>Recall that we compile top-level expressions into a self-contained LLVM
-function that takes no arguments and returns the computed double.  Because the
-LLVM JIT compiler matches the native platform ABI, this means that you can just
-cast the result pointer to a function pointer of that type and call it directly.
-This means, there is no difference between JIT compiled code and native machine
-code that is statically linked into your application.</p>
-
-<p>With just these two changes, lets see how Kaleidoscope works now!</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>4+5;</b>
-define double @""() {
-entry:
-        ret double 9.000000e+00
-}
-
-<em>Evaluated to 9.000000</em>
-</pre>
-</div>
-
-<p>Well this looks like it is basically working.  The dump of the function
-shows the "no argument function that always returns double" that we synthesize
-for each top level expression that is typed in.  This demonstrates very basic
-functionality, but can we do more?</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>def testfunc(x y) x + y*2; </b>
-Read function definition:
-define double @testfunc(double %x, double %y) {
-entry:
-        %multmp = fmul double %y, 2.000000e+00
-        %addtmp = fadd double %multmp, %x
-        ret double %addtmp
-}
-
-ready&gt; <b>testfunc(4, 10);</b>
-define double @""() {
-entry:
-        %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01)
-        ret double %calltmp
-}
-
-<em>Evaluated to 24.000000</em>
-</pre>
-</div>
-
-<p>This illustrates that we can now call user code, but there is something a bit
-subtle going on here.  Note that we only invoke the JIT on the anonymous
-functions that <em>call testfunc</em>, but we never invoked it
-on <em>testfunc</em> itself.  What actually happened here is that the JIT
-scanned for all non-JIT'd functions transitively called from the anonymous
-function and compiled all of them before returning
-from <tt>run_function</tt>.</p>
-
-<p>The JIT provides a number of other more advanced interfaces for things like
-freeing allocated machine code, rejit'ing functions to update them, etc.
-However, even with this simple code, we get some surprisingly powerful
-capabilities - check this out (I removed the dump of the anonymous functions,
-you should get the idea by now :) :</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>extern sin(x);</b>
-Read extern:
-declare double @sin(double)
-
-ready&gt; <b>extern cos(x);</b>
-Read extern:
-declare double @cos(double)
-
-ready&gt; <b>sin(1.0);</b>
-<em>Evaluated to 0.841471</em>
-
-ready&gt; <b>def foo(x) sin(x)*sin(x) + cos(x)*cos(x);</b>
-Read function definition:
-define double @foo(double %x) {
-entry:
-        %calltmp = call double @sin(double %x)
-        %multmp = fmul double %calltmp, %calltmp
-        %calltmp2 = call double @cos(double %x)
-        %multmp4 = fmul double %calltmp2, %calltmp2
-        %addtmp = fadd double %multmp, %multmp4
-        ret double %addtmp
-}
-
-ready&gt; <b>foo(4.0);</b>
-<em>Evaluated to 1.000000</em>
-</pre>
-</div>
-
-<p>Whoa, how does the JIT know about sin and cos?  The answer is surprisingly
-simple: in this example, the JIT started execution of a function and got to a
-function call.  It realized that the function was not yet JIT compiled and
-invoked the standard set of routines to resolve the function.  In this case,
-there is no body defined for the function, so the JIT ended up calling
-"<tt>dlsym("sin")</tt>" on the Kaleidoscope process itself.  Since
-"<tt>sin</tt>" is defined within the JIT's address space, it simply patches up
-calls in the module to call the libm version of <tt>sin</tt> directly.</p>
-
-<p>The LLVM JIT provides a number of interfaces (look in the
-<tt>llvm_executionengine.mli</tt> file) for controlling how unknown functions
-get resolved.  It allows you to establish explicit mappings between IR objects
-and addresses (useful for LLVM global variables that you want to map to static
-tables, for example), allows you to dynamically decide on the fly based on the
-function name, and even allows you to have the JIT compile functions lazily the
-first time they're called.</p>
-
-<p>One interesting application of this is that we can now extend the language
-by writing arbitrary C code to implement operations.  For example, if we add:
-</p>
-
-<div class="doc_code">
-<pre>
-/* putchard - putchar that takes a double and returns 0. */
-extern "C"
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-</pre>
-</div>
-
-<p>Now we can produce simple output to the console by using things like:
-"<tt>extern putchard(x); putchard(120);</tt>", which prints a lowercase 'x' on
-the console (120 is the ASCII code for 'x').  Similar code could be used to
-implement file I/O, console input, and many other capabilities in
-Kaleidoscope.</p>
-
-<p>This completes the JIT and optimizer chapter of the Kaleidoscope tutorial. At
-this point, we can compile a non-Turing-complete programming language, optimize
-and JIT compile it in a user-driven way.  Next up we'll look into <a
-href="OCamlLangImpl5.html">extending the language with control flow
-constructs</a>, tackling some interesting LLVM IR issues along the way.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with the
-LLVM JIT and optimizer.  To build this example, use:
-</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<dl>
-<dt>_tags:</dt>
-<dd class="doc_code">
-<pre>
-&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
-&lt;*.{byte,native}&gt;: g++, use_llvm, use_llvm_analysis
-&lt;*.{byte,native}&gt;: use_llvm_executionengine, use_llvm_target
-&lt;*.{byte,native}&gt;: use_llvm_scalar_opts, use_bindings
-</pre>
-</dd>
-
-<dt>myocamlbuild.ml:</dt>
-<dd class="doc_code">
-<pre>
-open Ocamlbuild_plugin;;
-
-ocaml_lib ~extern:true "llvm";;
-ocaml_lib ~extern:true "llvm_analysis";;
-ocaml_lib ~extern:true "llvm_executionengine";;
-ocaml_lib ~extern:true "llvm_target";;
-ocaml_lib ~extern:true "llvm_scalar_opts";;
-
-flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
-dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
-</pre>
-</dd>
-
-<dt>token.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-</pre>
-</dd>
-
-<dt>lexer.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [&lt; ' ('#'); stream &gt;] -&gt;
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [&lt; 'c; stream &gt;] -&gt;
-      [&lt; 'Token.Kwd c; lex stream &gt;]
-
-  (* end of stream. *)
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-
-and lex_number buffer = parser
-  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
-
-and lex_ident buffer = parser
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-
-and lex_comment = parser
-  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
-  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-</pre>
-</dd>
-
-<dt>ast.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto = Prototype of string * string array
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-</pre>
-</dd>
-
-<dt>parser.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
-      let rec parse_args accumulator = parser
-        | [&lt; e=parse_expr; stream &gt;] -&gt;
-            begin parser
-              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
-              | [&lt; &gt;] -&gt; e :: accumulator
-            end stream
-        | [&lt; &gt;] -&gt; accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [&lt; 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [&lt; &gt;] -&gt; Ast.Variable id
-      in
-      parse_ident id stream
-
-  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec &lt; expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the primary expression after the binary operator. *)
-        let rhs = parse_primary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) -&gt;
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec &lt; next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -&gt; rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -&gt; lhs
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [&lt; lhs=parse_primary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')' *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [&lt; e=parse_expr &gt;] -&gt;
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
-</pre>
-</dd>
-
-<dt>codegen.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Code Generation
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-
-let rec codegen_expr = function
-  | Ast.Number n -&gt; const_float double_type n
-  | Ast.Variable name -&gt;
-      (try Hashtbl.find named_values name with
-        | Not_found -&gt; raise (Error "unknown variable name"))
-  | Ast.Binary (op, lhs, rhs) -&gt;
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
-        | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
-        | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
-        | '&lt;' -&gt;
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        | _ -&gt; raise (Error "invalid binary operator")
-      end
-  | Ast.Call (callee, args) -&gt;
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-
-let codegen_proto = function
-  | Ast.Prototype (name, args) -&gt;
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-        | None -&gt; declare_function name ft the_module
-
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f -&gt;
-            (* If 'f' already has a body, reject this. *)
-            if block_begin f &lt;&gt; At_end f then
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if element_type (type_of f) &lt;&gt; ft then
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a -&gt;
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-
-let codegen_func the_fpm = function
-  | Ast.Function (proto, body) -&gt;
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        (* Optimize the function. *)
-        let _ = PassManager.run_function the_function the_fpm in
-
-        the_function
-      with e -&gt;
-        delete_function the_function;
-        raise e
-</pre>
-</dd>
-
-<dt>toplevel.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop the_fpm the_execution_engine stream =
-  match Stream.peek stream with
-  | None -&gt; ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') -&gt;
-      Stream.junk stream;
-      main_loop the_fpm the_execution_engine stream
-
-  | Some token -&gt;
-      begin
-        try match token with
-        | Token.Def -&gt;
-            let e = Parser.parse_definition stream in
-            print_endline "parsed a function definition.";
-            dump_value (Codegen.codegen_func the_fpm e);
-        | Token.Extern -&gt;
-            let e = Parser.parse_extern stream in
-            print_endline "parsed an extern.";
-            dump_value (Codegen.codegen_proto e);
-        | _ -&gt;
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            let the_function = Codegen.codegen_func the_fpm e in
-            dump_value the_function;
-
-            (* JIT the function, returning a function pointer. *)
-            let result = ExecutionEngine.run_function the_function [||]
-              the_execution_engine in
-
-            print_string "Evaluated to ";
-            print_float (GenericValue.as_float Codegen.double_type result);
-            print_newline ();
-        with Stream.Error s | Codegen.Error s -&gt;
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready&gt; "; flush stdout;
-      main_loop the_fpm the_execution_engine stream
-</pre>
-</dd>
-
-<dt>toy.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-open Llvm_target
-open Llvm_scalar_opts
-
-let main () =
-  ignore (initialize_native_target ());
-
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '&lt;' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready&gt; "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combination the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-
-  (* Eliminate Common SubExpressions. *)
-  add_gvn the_fpm;
-
-  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
-  add_cfg_simplification the_fpm;
-
-  ignore (PassManager.initialize the_fpm);
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop the_fpm the_execution_engine stream;
-
-  (* Print out all the generated code. *)
-  dump_module Codegen.the_module
-;;
-
-main ()
-</pre>
-</dd>
-
-<dt>bindings.c</dt>
-<dd class="doc_code">
-<pre>
-#include &lt;stdio.h&gt;
-
-/* putchard - putchar that takes a double and returns 0. */
-extern double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-</pre>
-</dd>
-</dl>
-
-<a href="OCamlLangImpl5.html">Next: Extending the language: control flow</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/OCamlLangImpl4.rst b/docs/tutorial/OCamlLangImpl4.rst
new file mode 100644
index 0000000000..865a03dfb7
--- /dev/null
+++ b/docs/tutorial/OCamlLangImpl4.rst
@@ -0,0 +1,918 @@
+==============================================
+Kaleidoscope: Adding JIT and Optimizer Support
+==============================================
+
+.. contents::
+   :local:
+
+Written by `Chris Lattner <mailto:sabre@nondot.org>`_ and `Erick
+Tryzelaar <mailto:idadesub@users.sourceforge.net>`_
+
+Chapter 4 Introduction
+======================
+
+Welcome to Chapter 4 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. Chapters 1-3 described the implementation
+of a simple language and added support for generating LLVM IR. This
+chapter describes two new techniques: adding optimizer support to your
+language, and adding JIT compiler support. These additions will
+demonstrate how to get nice, efficient code for the Kaleidoscope
+language.
+
+Trivial Constant Folding
+========================
+
+**Note:** the default ``IRBuilder`` now always includes the constant
+folding optimisations below.
+
+Our demonstration for Chapter 3 is elegant and easy to extend.
+Unfortunately, it does not produce wonderful code. For example, when
+compiling simple code, we don't get obvious optimizations:
+
+::
+
+    ready> def test(x) 1+2+x;
+    Read function definition:
+    define double @test(double %x) {
+    entry:
+            %addtmp = fadd double 1.000000e+00, 2.000000e+00
+            %addtmp1 = fadd double %addtmp, %x
+            ret double %addtmp1
+    }
+
+This code is a very, very literal transcription of the AST built by
+parsing the input. As such, this transcription lacks optimizations like
+constant folding (we'd like to get "``add x, 3.0``" in the example
+above) as well as other more important optimizations. Constant folding,
+in particular, is a very common and very important optimization: so much
+so that many language implementors implement constant folding support in
+their AST representation.
+
+With LLVM, you don't need this support in the AST. Since all calls to
+build LLVM IR go through the LLVM builder, it would be nice if the
+builder itself checked to see if there was a constant folding
+opportunity when you call it. If so, it could just do the constant fold
+and return the constant instead of creating an instruction. This is
+exactly what the ``LLVMFoldingBuilder`` class does.
+
+All we did was switch from ``LLVMBuilder`` to ``LLVMFoldingBuilder``.
+Though we change no other code, we now have all of our instructions
+implicitly constant folded without us having to do anything about it.
+For example, the input above now compiles to:
+
+::
+
+    ready> def test(x) 1+2+x;
+    Read function definition:
+    define double @test(double %x) {
+    entry:
+            %addtmp = fadd double 3.000000e+00, %x
+            ret double %addtmp
+    }
+
+Well, that was easy :). In practice, we recommend always using
+``LLVMFoldingBuilder`` when generating code like this. It has no
+"syntactic overhead" for its use (you don't have to uglify your compiler
+with constant checks everywhere) and it can dramatically reduce the
+amount of LLVM IR that is generated in some cases (particular for
+languages with a macro preprocessor or that use a lot of constants).
+
+On the other hand, the ``LLVMFoldingBuilder`` is limited by the fact
+that it does all of its analysis inline with the code as it is built. If
+you take a slightly more complex example:
+
+::
+
+    ready> def test(x) (1+2+x)*(x+(1+2));
+    ready> Read function definition:
+    define double @test(double %x) {
+    entry:
+            %addtmp = fadd double 3.000000e+00, %x
+            %addtmp1 = fadd double %x, 3.000000e+00
+            %multmp = fmul double %addtmp, %addtmp1
+            ret double %multmp
+    }
+
+In this case, the LHS and RHS of the multiplication are the same value.
+We'd really like to see this generate "``tmp = x+3; result = tmp*tmp;``"
+instead of computing "``x*3``" twice.
+
+Unfortunately, no amount of local analysis will be able to detect and
+correct this. This requires two transformations: reassociation of
+expressions (to make the add's lexically identical) and Common
+Subexpression Elimination (CSE) to delete the redundant add instruction.
+Fortunately, LLVM provides a broad range of optimizations that you can
+use, in the form of "passes".
+
+LLVM Optimization Passes
+========================
+
+LLVM provides many optimization passes, which do many different sorts of
+things and have different tradeoffs. Unlike other systems, LLVM doesn't
+hold to the mistaken notion that one set of optimizations is right for
+all languages and for all situations. LLVM allows a compiler implementor
+to make complete decisions about what optimizations to use, in which
+order, and in what situation.
+
+As a concrete example, LLVM supports both "whole module" passes, which
+look across as large of body of code as they can (often a whole file,
+but if run at link time, this can be a substantial portion of the whole
+program). It also supports and includes "per-function" passes which just
+operate on a single function at a time, without looking at other
+functions. For more information on passes and how they are run, see the
+`How to Write a Pass <../WritingAnLLVMPass.html>`_ document and the
+`List of LLVM Passes <../Passes.html>`_.
+
+For Kaleidoscope, we are currently generating functions on the fly, one
+at a time, as the user types them in. We aren't shooting for the
+ultimate optimization experience in this setting, but we also want to
+catch the easy and quick stuff where possible. As such, we will choose
+to run a few per-function optimizations as the user types the function
+in. If we wanted to make a "static Kaleidoscope compiler", we would use
+exactly the code we have now, except that we would defer running the
+optimizer until the entire file has been parsed.
+
+In order to get per-function optimizations going, we need to set up a
+`Llvm.PassManager <../WritingAnLLVMPass.html#passmanager>`_ to hold and
+organize the LLVM optimizations that we want to run. Once we have that,
+we can add a set of optimizations to run. The code looks like this:
+
+.. code-block:: ocaml
+
+      (* Create the JIT. *)
+      let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+      let the_fpm = PassManager.create_function Codegen.the_module in
+
+      (* Set up the optimizer pipeline.  Start with registering info about how the
+       * target lays out data structures. *)
+      DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+      (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+      add_instruction_combining the_fpm;
+
+      (* reassociate expressions. *)
+      add_reassociation the_fpm;
+
+      (* Eliminate Common SubExpressions. *)
+      add_gvn the_fpm;
+
+      (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+      add_cfg_simplification the_fpm;
+
+      ignore (PassManager.initialize the_fpm);
+
+      (* Run the main "interpreter loop" now. *)
+      Toplevel.main_loop the_fpm the_execution_engine stream;
+
+The meat of the matter here, is the definition of "``the_fpm``". It
+requires a pointer to the ``the_module`` to construct itself. Once it is
+set up, we use a series of "add" calls to add a bunch of LLVM passes.
+The first pass is basically boilerplate, it adds a pass so that later
+optimizations know how the data structures in the program are laid out.
+The "``the_execution_engine``" variable is related to the JIT, which we
+will get to in the next section.
+
+In this case, we choose to add 4 optimization passes. The passes we
+chose here are a pretty standard set of "cleanup" optimizations that are
+useful for a wide variety of code. I won't delve into what they do but,
+believe me, they are a good starting place :).
+
+Once the ``Llvm.PassManager.`` is set up, we need to make use of it. We
+do this by running it after our newly created function is constructed
+(in ``Codegen.codegen_func``), but before it is returned to the client:
+
+.. code-block:: ocaml
+
+    let codegen_func the_fpm = function
+          ...
+          try
+            let ret_val = codegen_expr body in
+
+            (* Finish off the function. *)
+            let _ = build_ret ret_val builder in
+
+            (* Validate the generated code, checking for consistency. *)
+            Llvm_analysis.assert_valid_function the_function;
+
+            (* Optimize the function. *)
+            let _ = PassManager.run_function the_function the_fpm in
+
+            the_function
+
+As you can see, this is pretty straightforward. The ``the_fpm``
+optimizes and updates the LLVM Function\* in place, improving
+(hopefully) its body. With this in place, we can try our test above
+again:
+
+::
+
+    ready> def test(x) (1+2+x)*(x+(1+2));
+    ready> Read function definition:
+    define double @test(double %x) {
+    entry:
+            %addtmp = fadd double %x, 3.000000e+00
+            %multmp = fmul double %addtmp, %addtmp
+            ret double %multmp
+    }
+
+As expected, we now get our nicely optimized code, saving a floating
+point add instruction from every execution of this function.
+
+LLVM provides a wide variety of optimizations that can be used in
+certain circumstances. Some `documentation about the various
+passes <../Passes.html>`_ is available, but it isn't very complete.
+Another good source of ideas can come from looking at the passes that
+``Clang`` runs to get started. The "``opt``" tool allows you to
+experiment with passes from the command line, so you can see if they do
+anything.
+
+Now that we have reasonable code coming out of our front-end, lets talk
+about executing it!
+
+Adding a JIT Compiler
+=====================
+
+Code that is available in LLVM IR can have a wide variety of tools
+applied to it. For example, you can run optimizations on it (as we did
+above), you can dump it out in textual or binary forms, you can compile
+the code to an assembly file (.s) for some target, or you can JIT
+compile it. The nice thing about the LLVM IR representation is that it
+is the "common currency" between many different parts of the compiler.
+
+In this section, we'll add JIT compiler support to our interpreter. The
+basic idea that we want for Kaleidoscope is to have the user enter
+function bodies as they do now, but immediately evaluate the top-level
+expressions they type in. For example, if they type in "1 + 2;", we
+should evaluate and print out 3. If they define a function, they should
+be able to call it from the command line.
+
+In order to do this, we first declare and initialize the JIT. This is
+done by adding a global variable and a call in ``main``:
+
+.. code-block:: ocaml
+
+    ...
+    let main () =
+      ...
+      (* Create the JIT. *)
+      let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+      ...
+
+This creates an abstract "Execution Engine" which can be either a JIT
+compiler or the LLVM interpreter. LLVM will automatically pick a JIT
+compiler for you if one is available for your platform, otherwise it
+will fall back to the interpreter.
+
+Once the ``Llvm_executionengine.ExecutionEngine.t`` is created, the JIT
+is ready to be used. There are a variety of APIs that are useful, but
+the simplest one is the
+"``Llvm_executionengine.ExecutionEngine.run_function``" function. This
+method JIT compiles the specified LLVM Function and returns a function
+pointer to the generated machine code. In our case, this means that we
+can change the code that parses a top-level expression to look like
+this:
+
+.. code-block:: ocaml
+
+                (* Evaluate a top-level expression into an anonymous function. *)
+                let e = Parser.parse_toplevel stream in
+                print_endline "parsed a top-level expr";
+                let the_function = Codegen.codegen_func the_fpm e in
+                dump_value the_function;
+
+                (* JIT the function, returning a function pointer. *)
+                let result = ExecutionEngine.run_function the_function [||]
+                  the_execution_engine in
+
+                print_string "Evaluated to ";
+                print_float (GenericValue.as_float Codegen.double_type result);
+                print_newline ();
+
+Recall that we compile top-level expressions into a self-contained LLVM
+function that takes no arguments and returns the computed double.
+Because the LLVM JIT compiler matches the native platform ABI, this
+means that you can just cast the result pointer to a function pointer of
+that type and call it directly. This means, there is no difference
+between JIT compiled code and native machine code that is statically
+linked into your application.
+
+With just these two changes, lets see how Kaleidoscope works now!
+
+::
+
+    ready> 4+5;
+    define double @""() {
+    entry:
+            ret double 9.000000e+00
+    }
+
+    Evaluated to 9.000000
+
+Well this looks like it is basically working. The dump of the function
+shows the "no argument function that always returns double" that we
+synthesize for each top level expression that is typed in. This
+demonstrates very basic functionality, but can we do more?
+
+::
+
+    ready> def testfunc(x y) x + y*2;
+    Read function definition:
+    define double @testfunc(double %x, double %y) {
+    entry:
+            %multmp = fmul double %y, 2.000000e+00
+            %addtmp = fadd double %multmp, %x
+            ret double %addtmp
+    }
+
+    ready> testfunc(4, 10);
+    define double @""() {
+    entry:
+            %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01)
+            ret double %calltmp
+    }
+
+    Evaluated to 24.000000
+
+This illustrates that we can now call user code, but there is something
+a bit subtle going on here. Note that we only invoke the JIT on the
+anonymous functions that *call testfunc*, but we never invoked it on
+*testfunc* itself. What actually happened here is that the JIT scanned
+for all non-JIT'd functions transitively called from the anonymous
+function and compiled all of them before returning from
+``run_function``.
+
+The JIT provides a number of other more advanced interfaces for things
+like freeing allocated machine code, rejit'ing functions to update them,
+etc. However, even with this simple code, we get some surprisingly
+powerful capabilities - check this out (I removed the dump of the
+anonymous functions, you should get the idea by now :) :
+
+::
+
+    ready> extern sin(x);
+    Read extern:
+    declare double @sin(double)
+
+    ready> extern cos(x);
+    Read extern:
+    declare double @cos(double)
+
+    ready> sin(1.0);
+    Evaluated to 0.841471
+
+    ready> def foo(x) sin(x)*sin(x) + cos(x)*cos(x);
+    Read function definition:
+    define double @foo(double %x) {
+    entry:
+            %calltmp = call double @sin(double %x)
+            %multmp = fmul double %calltmp, %calltmp
+            %calltmp2 = call double @cos(double %x)
+            %multmp4 = fmul double %calltmp2, %calltmp2
+            %addtmp = fadd double %multmp, %multmp4
+            ret double %addtmp
+    }
+
+    ready> foo(4.0);
+    Evaluated to 1.000000
+
+Whoa, how does the JIT know about sin and cos? The answer is
+surprisingly simple: in this example, the JIT started execution of a
+function and got to a function call. It realized that the function was
+not yet JIT compiled and invoked the standard set of routines to resolve
+the function. In this case, there is no body defined for the function,
+so the JIT ended up calling "``dlsym("sin")``" on the Kaleidoscope
+process itself. Since "``sin``" is defined within the JIT's address
+space, it simply patches up calls in the module to call the libm version
+of ``sin`` directly.
+
+The LLVM JIT provides a number of interfaces (look in the
+``llvm_executionengine.mli`` file) for controlling how unknown functions
+get resolved. It allows you to establish explicit mappings between IR
+objects and addresses (useful for LLVM global variables that you want to
+map to static tables, for example), allows you to dynamically decide on
+the fly based on the function name, and even allows you to have the JIT
+compile functions lazily the first time they're called.
+
+One interesting application of this is that we can now extend the
+language by writing arbitrary C code to implement operations. For
+example, if we add:
+
+.. code-block:: c++
+
+    /* putchard - putchar that takes a double and returns 0. */
+    extern "C"
+    double putchard(double X) {
+      putchar((char)X);
+      return 0;
+    }
+
+Now we can produce simple output to the console by using things like:
+"``extern putchard(x); putchard(120);``", which prints a lowercase 'x'
+on the console (120 is the ASCII code for 'x'). Similar code could be
+used to implement file I/O, console input, and many other capabilities
+in Kaleidoscope.
+
+This completes the JIT and optimizer chapter of the Kaleidoscope
+tutorial. At this point, we can compile a non-Turing-complete
+programming language, optimize and JIT compile it in a user-driven way.
+Next up we'll look into `extending the language with control flow
+constructs <OCamlLangImpl5.html>`_, tackling some interesting LLVM IR
+issues along the way.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+the LLVM JIT and optimizer. To build this example, use:
+
+.. code-block:: bash
+
+    # Compile
+    ocamlbuild toy.byte
+    # Run
+    ./toy.byte
+
+Here is the code:
+
+\_tags:
+    ::
+
+        <{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
+        <*.{byte,native}>: g++, use_llvm, use_llvm_analysis
+        <*.{byte,native}>: use_llvm_executionengine, use_llvm_target
+        <*.{byte,native}>: use_llvm_scalar_opts, use_bindings
+
+myocamlbuild.ml:
+    .. code-block:: ocaml
+
+        open Ocamlbuild_plugin;;
+
+        ocaml_lib ~extern:true "llvm";;
+        ocaml_lib ~extern:true "llvm_analysis";;
+        ocaml_lib ~extern:true "llvm_executionengine";;
+        ocaml_lib ~extern:true "llvm_target";;
+        ocaml_lib ~extern:true "llvm_scalar_opts";;
+
+        flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
+        dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
+
+token.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer Tokens
+         *===----------------------------------------------------------------------===*)
+
+        (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+         * these others for known things. *)
+        type token =
+          (* commands *)
+          | Def | Extern
+
+          (* primary *)
+          | Ident of string | Number of float
+
+          (* unknown *)
+          | Kwd of char
+
+lexer.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer
+         *===----------------------------------------------------------------------===*)
+
+        let rec lex = parser
+          (* Skip any whitespace. *)
+          | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+          (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+
+          (* number: [0-9.]+ *)
+          | [< ' ('0' .. '9' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+
+          (* Comment until end of line. *)
+          | [< ' ('#'); stream >] ->
+              lex_comment stream
+
+          (* Otherwise, just return the character as its ascii value. *)
+          | [< 'c; stream >] ->
+              [< 'Token.Kwd c; lex stream >]
+
+          (* end of stream. *)
+          | [< >] -> [< >]
+
+        and lex_number buffer = parser
+          | [< ' ('0' .. '9' | '.' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+          | [< stream=lex >] ->
+              [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+        and lex_ident buffer = parser
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+          | [< stream=lex >] ->
+              match Buffer.contents buffer with
+              | "def" -> [< 'Token.Def; stream >]
+              | "extern" -> [< 'Token.Extern; stream >]
+              | id -> [< 'Token.Ident id; stream >]
+
+        and lex_comment = parser
+          | [< ' ('\n'); stream=lex >] -> stream
+          | [< 'c; e=lex_comment >] -> e
+          | [< >] -> [< >]
+
+ast.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Abstract Syntax Tree (aka Parse Tree)
+         *===----------------------------------------------------------------------===*)
+
+        (* expr - Base type for all expression nodes. *)
+        type expr =
+          (* variant for numeric literals like "1.0". *)
+          | Number of float
+
+          (* variant for referencing a variable, like "a". *)
+          | Variable of string
+
+          (* variant for a binary operator. *)
+          | Binary of char * expr * expr
+
+          (* variant for function calls. *)
+          | Call of string * expr array
+
+        (* proto - This type represents the "prototype" for a function, which captures
+         * its name, and its argument names (thus implicitly the number of arguments the
+         * function takes). *)
+        type proto = Prototype of string * string array
+
+        (* func - This type represents a function definition itself. *)
+        type func = Function of proto * expr
+
+parser.ml:
+    .. code-block:: ocaml
+
+        (*===---------------------------------------------------------------------===
+         * Parser
+         *===---------------------------------------------------------------------===*)
+
+        (* binop_precedence - This holds the precedence for each binary operator that is
+         * defined *)
+        let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+        (* precedence - Get the precedence of the pending binary operator token. *)
+        let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+        (* primary
+         *   ::= identifier
+         *   ::= numberexpr
+         *   ::= parenexpr *)
+        let rec parse_primary = parser
+          (* numberexpr ::= number *)
+          | [< 'Token.Number n >] -> Ast.Number n
+
+          (* parenexpr ::= '(' expression ')' *)
+          | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+          (* identifierexpr
+           *   ::= identifier
+           *   ::= identifier '(' argumentexpr ')' *)
+          | [< 'Token.Ident id; stream >] ->
+              let rec parse_args accumulator = parser
+                | [< e=parse_expr; stream >] ->
+                    begin parser
+                      | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+                      | [< >] -> e :: accumulator
+                    end stream
+                | [< >] -> accumulator
+              in
+              let rec parse_ident id = parser
+                (* Call. *)
+                | [< 'Token.Kwd '(';
+                     args=parse_args [];
+                     'Token.Kwd ')' ?? "expected ')'">] ->
+                    Ast.Call (id, Array.of_list (List.rev args))
+
+                (* Simple variable ref. *)
+                | [< >] -> Ast.Variable id
+              in
+              parse_ident id stream
+
+          | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+        (* binoprhs
+         *   ::= ('+' primary)* *)
+        and parse_bin_rhs expr_prec lhs stream =
+          match Stream.peek stream with
+          (* If this is a binop, find its precedence. *)
+          | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+              let token_prec = precedence c in
+
+              (* If this is a binop that binds at least as tightly as the current binop,
+               * consume it, otherwise we are done. *)
+              if token_prec < expr_prec then lhs else begin
+                (* Eat the binop. *)
+                Stream.junk stream;
+
+                (* Parse the primary expression after the binary operator. *)
+                let rhs = parse_primary stream in
+
+                (* Okay, we know this is a binop. *)
+                let rhs =
+                  match Stream.peek stream with
+                  | Some (Token.Kwd c2) ->
+                      (* If BinOp binds less tightly with rhs than the operator after
+                       * rhs, let the pending operator take rhs as its lhs. *)
+                      let next_prec = precedence c2 in
+                      if token_prec < next_prec
+                      then parse_bin_rhs (token_prec + 1) rhs stream
+                      else rhs
+                  | _ -> rhs
+                in
+
+                (* Merge lhs/rhs. *)
+                let lhs = Ast.Binary (c, lhs, rhs) in
+                parse_bin_rhs expr_prec lhs stream
+              end
+          | _ -> lhs
+
+        (* expression
+         *   ::= primary binoprhs *)
+        and parse_expr = parser
+          | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
+
+        (* prototype
+         *   ::= id '(' id* ')' *)
+        let parse_prototype =
+          let rec parse_args accumulator = parser
+            | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+            | [< >] -> accumulator
+          in
+
+          parser
+          | [< 'Token.Ident id;
+               'Token.Kwd '(' ?? "expected '(' in prototype";
+               args=parse_args [];
+               'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+              (* success. *)
+              Ast.Prototype (id, Array.of_list (List.rev args))
+
+          | [< >] ->
+              raise (Stream.Error "expected function name in prototype")
+
+        (* definition ::= 'def' prototype expression *)
+        let parse_definition = parser
+          | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+              Ast.Function (p, e)
+
+        (* toplevelexpr ::= expression *)
+        let parse_toplevel = parser
+          | [< e=parse_expr >] ->
+              (* Make an anonymous proto. *)
+              Ast.Function (Ast.Prototype ("", [||]), e)
+
+        (*  external ::= 'extern' prototype *)
+        let parse_extern = parser
+          | [< 'Token.Extern; e=parse_prototype >] -> e
+
+codegen.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Code Generation
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+
+        exception Error of string
+
+        let context = global_context ()
+        let the_module = create_module context "my cool jit"
+        let builder = builder context
+        let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+        let double_type = double_type context
+
+        let rec codegen_expr = function
+          | Ast.Number n -> const_float double_type n
+          | Ast.Variable name ->
+              (try Hashtbl.find named_values name with
+                | Not_found -> raise (Error "unknown variable name"))
+          | Ast.Binary (op, lhs, rhs) ->
+              let lhs_val = codegen_expr lhs in
+              let rhs_val = codegen_expr rhs in
+              begin
+                match op with
+                | '+' -> build_add lhs_val rhs_val "addtmp" builder
+                | '-' -> build_sub lhs_val rhs_val "subtmp" builder
+                | '*' -> build_mul lhs_val rhs_val "multmp" builder
+                | '<' ->
+                    (* Convert bool 0/1 to double 0.0 or 1.0 *)
+                    let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+                    build_uitofp i double_type "booltmp" builder
+                | _ -> raise (Error "invalid binary operator")
+              end
+          | Ast.Call (callee, args) ->
+              (* Look up the name in the module table. *)
+              let callee =
+                match lookup_function callee the_module with
+                | Some callee -> callee
+                | None -> raise (Error "unknown function referenced")
+              in
+              let params = params callee in
+
+              (* If argument mismatch error. *)
+              if Array.length params == Array.length args then () else
+                raise (Error "incorrect # arguments passed");
+              let args = Array.map codegen_expr args in
+              build_call callee args "calltmp" builder
+
+        let codegen_proto = function
+          | Ast.Prototype (name, args) ->
+              (* Make the function type: double(double,double) etc. *)
+              let doubles = Array.make (Array.length args) double_type in
+              let ft = function_type double_type doubles in
+              let f =
+                match lookup_function name the_module with
+                | None -> declare_function name ft the_module
+
+                (* If 'f' conflicted, there was already something named 'name'. If it
+                 * has a body, don't allow redefinition or reextern. *)
+                | Some f ->
+                    (* If 'f' already has a body, reject this. *)
+                    if block_begin f <> At_end f then
+                      raise (Error "redefinition of function");
+
+                    (* If 'f' took a different number of arguments, reject. *)
+                    if element_type (type_of f) <> ft then
+                      raise (Error "redefinition of function with different # args");
+                    f
+              in
+
+              (* Set names for all arguments. *)
+              Array.iteri (fun i a ->
+                let n = args.(i) in
+                set_value_name n a;
+                Hashtbl.add named_values n a;
+              ) (params f);
+              f
+
+        let codegen_func the_fpm = function
+          | Ast.Function (proto, body) ->
+              Hashtbl.clear named_values;
+              let the_function = codegen_proto proto in
+
+              (* Create a new basic block to start insertion into. *)
+              let bb = append_block context "entry" the_function in
+              position_at_end bb builder;
+
+              try
+                let ret_val = codegen_expr body in
+
+                (* Finish off the function. *)
+                let _ = build_ret ret_val builder in
+
+                (* Validate the generated code, checking for consistency. *)
+                Llvm_analysis.assert_valid_function the_function;
+
+                (* Optimize the function. *)
+                let _ = PassManager.run_function the_function the_fpm in
+
+                the_function
+              with e ->
+                delete_function the_function;
+                raise e
+
+toplevel.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Top-Level parsing and JIT Driver
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+        open Llvm_executionengine
+
+        (* top ::= definition | external | expression | ';' *)
+        let rec main_loop the_fpm the_execution_engine stream =
+          match Stream.peek stream with
+          | None -> ()
+
+          (* ignore top-level semicolons. *)
+          | Some (Token.Kwd ';') ->
+              Stream.junk stream;
+              main_loop the_fpm the_execution_engine stream
+
+          | Some token ->
+              begin
+                try match token with
+                | Token.Def ->
+                    let e = Parser.parse_definition stream in
+                    print_endline "parsed a function definition.";
+                    dump_value (Codegen.codegen_func the_fpm e);
+                | Token.Extern ->
+                    let e = Parser.parse_extern stream in
+                    print_endline "parsed an extern.";
+                    dump_value (Codegen.codegen_proto e);
+                | _ ->
+                    (* Evaluate a top-level expression into an anonymous function. *)
+                    let e = Parser.parse_toplevel stream in
+                    print_endline "parsed a top-level expr";
+                    let the_function = Codegen.codegen_func the_fpm e in
+                    dump_value the_function;
+
+                    (* JIT the function, returning a function pointer. *)
+                    let result = ExecutionEngine.run_function the_function [||]
+                      the_execution_engine in
+
+                    print_string "Evaluated to ";
+                    print_float (GenericValue.as_float Codegen.double_type result);
+                    print_newline ();
+                with Stream.Error s | Codegen.Error s ->
+                  (* Skip token for error recovery. *)
+                  Stream.junk stream;
+                  print_endline s;
+              end;
+              print_string "ready> "; flush stdout;
+              main_loop the_fpm the_execution_engine stream
+
+toy.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Main driver code.
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+        open Llvm_executionengine
+        open Llvm_target
+        open Llvm_scalar_opts
+
+        let main () =
+          ignore (initialize_native_target ());
+
+          (* Install standard binary operators.
+           * 1 is the lowest precedence. *)
+          Hashtbl.add Parser.binop_precedence '<' 10;
+          Hashtbl.add Parser.binop_precedence '+' 20;
+          Hashtbl.add Parser.binop_precedence '-' 20;
+          Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+          (* Prime the first token. *)
+          print_string "ready> "; flush stdout;
+          let stream = Lexer.lex (Stream.of_channel stdin) in
+
+          (* Create the JIT. *)
+          let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+          let the_fpm = PassManager.create_function Codegen.the_module in
+
+          (* Set up the optimizer pipeline.  Start with registering info about how the
+           * target lays out data structures. *)
+          DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+          (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+          add_instruction_combination the_fpm;
+
+          (* reassociate expressions. *)
+          add_reassociation the_fpm;
+
+          (* Eliminate Common SubExpressions. *)
+          add_gvn the_fpm;
+
+          (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+          add_cfg_simplification the_fpm;
+
+          ignore (PassManager.initialize the_fpm);
+
+          (* Run the main "interpreter loop" now. *)
+          Toplevel.main_loop the_fpm the_execution_engine stream;
+
+          (* Print out all the generated code. *)
+          dump_module Codegen.the_module
+        ;;
+
+        main ()
+
+bindings.c
+    .. code-block:: c
+
+        #include <stdio.h>
+
+        /* putchard - putchar that takes a double and returns 0. */
+        extern double putchard(double X) {
+          putchar((char)X);
+          return 0;
+        }
+
+`Next: Extending the language: control flow <OCamlLangImpl5.html>`_
+
diff --git a/docs/tutorial/OCamlLangImpl5.html b/docs/tutorial/OCamlLangImpl5.html
deleted file mode 100644
index d25f1dc9bb..0000000000
--- a/docs/tutorial/OCamlLangImpl5.html
+++ /dev/null
@@ -1,1560 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Extending the Language: Control Flow</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <meta name="author" content="Erick Tryzelaar">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Extending the Language: Control Flow</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 5
-  <ol>
-    <li><a href="#intro">Chapter 5 Introduction</a></li>
-    <li><a href="#ifthen">If/Then/Else</a>
-    <ol>
-      <li><a href="#iflexer">Lexer Extensions</a></li>
-      <li><a href="#ifast">AST Extensions</a></li>
-      <li><a href="#ifparser">Parser Extensions</a></li>
-      <li><a href="#ifir">LLVM IR</a></li>
-      <li><a href="#ifcodegen">Code Generation</a></li>
-    </ol>
-    </li>
-    <li><a href="#for">'for' Loop Expression</a>
-    <ol>
-      <li><a href="#forlexer">Lexer Extensions</a></li>
-      <li><a href="#forast">AST Extensions</a></li>
-      <li><a href="#forparser">Parser Extensions</a></li>
-      <li><a href="#forir">LLVM IR</a></li>
-      <li><a href="#forcodegen">Code Generation</a></li>
-    </ol>
-    </li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="OCamlLangImpl6.html">Chapter 6</a>: Extending the Language:
-User-defined Operators</li>
-</ul>
-
-<div class="doc_author">
-	<p>
-		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
-	</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 5 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 5 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  Parts 1-4 described the implementation of the simple
-Kaleidoscope language and included support for generating LLVM IR, followed by
-optimizations and a JIT compiler.  Unfortunately, as presented, Kaleidoscope is
-mostly useless: it has no control flow other than call and return.  This means
-that you can't have conditional branches in the code, significantly limiting its
-power.  In this episode of "build that compiler", we'll extend Kaleidoscope to
-have an if/then/else expression plus a simple 'for' loop.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="ifthen">If/Then/Else</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Extending Kaleidoscope to support if/then/else is quite straightforward.  It
-basically requires adding lexer support for this "new" concept to the lexer,
-parser, AST, and LLVM code emitter.  This example is nice, because it shows how
-easy it is to "grow" a language over time, incrementally extending it as new
-ideas are discovered.</p>
-
-<p>Before we get going on "how" we add this extension, lets talk about "what" we
-want.  The basic idea is that we want to be able to write this sort of thing:
-</p>
-
-<div class="doc_code">
-<pre>
-def fib(x)
-  if x &lt; 3 then
-    1
-  else
-    fib(x-1)+fib(x-2);
-</pre>
-</div>
-
-<p>In Kaleidoscope, every construct is an expression: there are no statements.
-As such, the if/then/else expression needs to return a value like any other.
-Since we're using a mostly functional form, we'll have it evaluate its
-conditional, then return the 'then' or 'else' value based on how the condition
-was resolved.  This is very similar to the C "?:" expression.</p>
-
-<p>The semantics of the if/then/else expression is that it evaluates the
-condition to a boolean equality value: 0.0 is considered to be false and
-everything else is considered to be true.
-If the condition is true, the first subexpression is evaluated and returned, if
-the condition is false, the second subexpression is evaluated and returned.
-Since Kaleidoscope allows side-effects, this behavior is important to nail down.
-</p>
-
-<p>Now that we know what we "want", lets break this down into its constituent
-pieces.</p>
-
-<!-- ======================================================================= -->
-<h4><a name="iflexer">Lexer Extensions for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-
-<div>
-
-<p>The lexer extensions are straightforward.  First we add new variants
-for the relevant tokens:</p>
-
-<div class="doc_code">
-<pre>
-  (* control *)
-  | If | Then | Else | For | In
-</pre>
-</div>
-
-<p>Once we have that, we recognize the new keywords in the lexer. This is pretty simple
-stuff:</p>
-
-<div class="doc_code">
-<pre>
-      ...
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | "if" -&gt; [&lt; 'Token.If; stream &gt;]
-      | "then" -&gt; [&lt; 'Token.Then; stream &gt;]
-      | "else" -&gt; [&lt; 'Token.Else; stream &gt;]
-      | "for" -&gt; [&lt; 'Token.For; stream &gt;]
-      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="ifast">AST Extensions for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>To represent the new expression we add a new AST variant for it:</p>
-
-<div class="doc_code">
-<pre>
-type expr =
-  ...
-  (* variant for if/then/else. *)
-  | If of expr * expr * expr
-</pre>
-</div>
-
-<p>The AST variant just has pointers to the various subexpressions.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="ifparser">Parser Extensions for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Now that we have the relevant tokens coming from the lexer and we have the
-AST node to build, our parsing logic is relatively straightforward.  First we
-define a new parsing function:</p>
-
-<div class="doc_code">
-<pre>
-let rec parse_primary = parser
-  ...
-  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
-  | [&lt; 'Token.If; c=parse_expr;
-       'Token.Then ?? "expected 'then'"; t=parse_expr;
-       'Token.Else ?? "expected 'else'"; e=parse_expr &gt;] -&gt;
-      Ast.If (c, t, e)
-</pre>
-</div>
-
-<p>Next we hook it up as a primary expression:</p>
-
-<div class="doc_code">
-<pre>
-let rec parse_primary = parser
-  ...
-  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
-  | [&lt; 'Token.If; c=parse_expr;
-       'Token.Then ?? "expected 'then'"; t=parse_expr;
-       'Token.Else ?? "expected 'else'"; e=parse_expr &gt;] -&gt;
-      Ast.If (c, t, e)
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="ifir">LLVM IR for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Now that we have it parsing and building the AST, the final piece is adding
-LLVM code generation support.  This is the most interesting part of the
-if/then/else example, because this is where it starts to introduce new concepts.
-All of the code above has been thoroughly described in previous chapters.
-</p>
-
-<p>To motivate the code we want to produce, lets take a look at a simple
-example.  Consider:</p>
-
-<div class="doc_code">
-<pre>
-extern foo();
-extern bar();
-def baz(x) if x then foo() else bar();
-</pre>
-</div>
-
-<p>If you disable optimizations, the code you'll (soon) get from Kaleidoscope
-looks like this:</p>
-
-<div class="doc_code">
-<pre>
-declare double @foo()
-
-declare double @bar()
-
-define double @baz(double %x) {
-entry:
-  %ifcond = fcmp one double %x, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:    ; preds = %entry
-  %calltmp = call double @foo()
-  br label %ifcont
-
-else:    ; preds = %entry
-  %calltmp1 = call double @bar()
-  br label %ifcont
-
-ifcont:    ; preds = %else, %then
-  %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ]
-  ret double %iftmp
-}
-</pre>
-</div>
-
-<p>To visualize the control flow graph, you can use a nifty feature of the LLVM
-'<a href="http://llvm.org/cmds/opt.html">opt</a>' tool.  If you put this LLVM IR
-into "t.ll" and run "<tt>llvm-as &lt; t.ll | opt -analyze -view-cfg</tt>", <a
-href="../ProgrammersManual.html#ViewGraph">a window will pop up</a> and you'll
-see this graph:</p>
-
-<div style="text-align: center"><img src="LangImpl5-cfg.png" alt="Example CFG" width="423"
-height="315"></div>
-
-<p>Another way to get this is to call "<tt>Llvm_analysis.view_function_cfg
-f</tt>" or "<tt>Llvm_analysis.view_function_cfg_only f</tt>" (where <tt>f</tt>
-is a "<tt>Function</tt>") either by inserting actual calls into the code and
-recompiling or by calling these in the debugger.  LLVM has many nice features
-for visualizing various graphs.</p>
-
-<p>Getting back to the generated code, it is fairly simple: the entry block
-evaluates the conditional expression ("x" in our case here) and compares the
-result to 0.0 with the "<tt><a href="../LangRef.html#i_fcmp">fcmp</a> one</tt>"
-instruction ('one' is "Ordered and Not Equal").  Based on the result of this
-expression, the code jumps to either the "then" or "else" blocks, which contain
-the expressions for the true/false cases.</p>
-
-<p>Once the then/else blocks are finished executing, they both branch back to the
-'ifcont' block to execute the code that happens after the if/then/else.  In this
-case the only thing left to do is to return to the caller of the function.  The
-question then becomes: how does the code know which expression to return?</p>
-
-<p>The answer to this question involves an important SSA operation: the
-<a href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Phi
-operation</a>.  If you're not familiar with SSA, <a
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">the wikipedia
-article</a> is a good introduction and there are various other introductions to
-it available on your favorite search engine.  The short version is that
-"execution" of the Phi operation requires "remembering" which block control came
-from.  The Phi operation takes on the value corresponding to the input control
-block.  In this case, if control comes in from the "then" block, it gets the
-value of "calltmp".  If control comes from the "else" block, it gets the value
-of "calltmp1".</p>
-
-<p>At this point, you are probably starting to think "Oh no! This means my
-simple and elegant front-end will have to start generating SSA form in order to
-use LLVM!".  Fortunately, this is not the case, and we strongly advise
-<em>not</em> implementing an SSA construction algorithm in your front-end
-unless there is an amazingly good reason to do so.  In practice, there are two
-sorts of values that float around in code written for your average imperative
-programming language that might need Phi nodes:</p>
-
-<ol>
-<li>Code that involves user variables: <tt>x = 1; x = x + 1; </tt></li>
-<li>Values that are implicit in the structure of your AST, such as the Phi node
-in this case.</li>
-</ol>
-
-<p>In <a href="OCamlLangImpl7.html">Chapter 7</a> of this tutorial ("mutable
-variables"), we'll talk about #1
-in depth.  For now, just believe me that you don't need SSA construction to
-handle this case.  For #2, you have the choice of using the techniques that we will
-describe for #1, or you can insert Phi nodes directly, if convenient.  In this
-case, it is really really easy to generate the Phi node, so we choose to do it
-directly.</p>
-
-<p>Okay, enough of the motivation and overview, lets generate code!</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="ifcodegen">Code Generation for If/Then/Else</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>In order to generate code for this, we implement the <tt>Codegen</tt> method
-for <tt>IfExprAST</tt>:</p>
-
-<div class="doc_code">
-<pre>
-let rec codegen_expr = function
-  ...
-  | Ast.If (cond, then_, else_) -&gt;
-      let cond = codegen_expr cond in
-
-      (* Convert condition to a bool by comparing equal to 0.0 *)
-      let zero = const_float double_type 0.0 in
-      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
-</pre>
-</div>
-
-<p>This code is straightforward and similar to what we saw before.  We emit the
-expression for the condition, then compare that value to zero to get a truth
-value as a 1-bit (bool) value.</p>
-
-<div class="doc_code">
-<pre>
-      (* Grab the first block so that we might later add the conditional branch
-       * to it at the end of the function. *)
-      let start_bb = insertion_block builder in
-      let the_function = block_parent start_bb in
-
-      let then_bb = append_block context "then" the_function in
-      position_at_end then_bb builder;
-</pre>
-</div>
-
-<p>
-As opposed to the <a href="LangImpl5.html">C++ tutorial</a>, we have to build
-our basic blocks bottom up since we can't have dangling BasicBlocks.  We start
-off by saving a pointer to the first block (which might not be the entry
-block), which we'll need to build a conditional branch later.  We do this by
-asking the <tt>builder</tt> for the current BasicBlock.  The fourth line
-gets the current Function object that is being built.  It gets this by the
-<tt>start_bb</tt> for its "parent" (the function it is currently embedded
-into).</p>
-
-<p>Once it has that, it creates one block.  It is automatically appended into
-the function's list of blocks.</p>
-
-<div class="doc_code">
-<pre>
-      (* Emit 'then' value. *)
-      position_at_end then_bb builder;
-      let then_val = codegen_expr then_ in
-
-      (* Codegen of 'then' can change the current block, update then_bb for the
-       * phi. We create a new name because one is used for the phi node, and the
-       * other is used for the conditional branch. *)
-      let new_then_bb = insertion_block builder in
-</pre>
-</div>
-
-<p>We move the builder to start inserting into the "then" block.  Strictly
-speaking, this call moves the insertion point to be at the end of the specified
-block.  However, since the "then" block is empty, it also starts out by
-inserting at the beginning of the block.  :)</p>
-
-<p>Once the insertion point is set, we recursively codegen the "then" expression
-from the AST.</p>
-
-<p>The final line here is quite subtle, but is very important.  The basic issue
-is that when we create the Phi node in the merge block, we need to set up the
-block/value pairs that indicate how the Phi will work.  Importantly, the Phi
-node expects to have an entry for each predecessor of the block in the CFG.  Why
-then, are we getting the current block when we just set it to ThenBB 5 lines
-above?  The problem is that the "Then" expression may actually itself change the
-block that the Builder is emitting into if, for example, it contains a nested
-"if/then/else" expression.  Because calling Codegen recursively could
-arbitrarily change the notion of the current block, we are required to get an
-up-to-date value for code that will set up the Phi node.</p>
-
-<div class="doc_code">
-<pre>
-      (* Emit 'else' value. *)
-      let else_bb = append_block context "else" the_function in
-      position_at_end else_bb builder;
-      let else_val = codegen_expr else_ in
-
-      (* Codegen of 'else' can change the current block, update else_bb for the
-       * phi. *)
-      let new_else_bb = insertion_block builder in
-</pre>
-</div>
-
-<p>Code generation for the 'else' block is basically identical to codegen for
-the 'then' block.</p>
-
-<div class="doc_code">
-<pre>
-      (* Emit merge block. *)
-      let merge_bb = append_block context "ifcont" the_function in
-      position_at_end merge_bb builder;
-      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
-      let phi = build_phi incoming "iftmp" builder in
-</pre>
-</div>
-
-<p>The first two lines here are now familiar: the first adds the "merge" block
-to the Function object.  The second block changes the insertion point so that
-newly created code will go into the "merge" block.  Once that is done, we need
-to create the PHI node and set up the block/value pairs for the PHI.</p>
-
-<div class="doc_code">
-<pre>
-      (* Return to the start block to add the conditional branch. *)
-      position_at_end start_bb builder;
-      ignore (build_cond_br cond_val then_bb else_bb builder);
-</pre>
-</div>
-
-<p>Once the blocks are created, we can emit the conditional branch that chooses
-between them.  Note that creating new blocks does not implicitly affect the
-IRBuilder, so it is still inserting into the block that the condition
-went into.  This is why we needed to save the "start" block.</p>
-
-<div class="doc_code">
-<pre>
-      (* Set a unconditional branch at the end of the 'then' block and the
-       * 'else' block to the 'merge' block. *)
-      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
-      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
-
-      (* Finally, set the builder to the end of the merge block. *)
-      position_at_end merge_bb builder;
-
-      phi
-</pre>
-</div>
-
-<p>To finish off the blocks, we create an unconditional branch
-to the merge block.  One interesting (and very important) aspect of the LLVM IR
-is that it <a href="../LangRef.html#functionstructure">requires all basic blocks
-to be "terminated"</a> with a <a href="../LangRef.html#terminators">control flow
-instruction</a> such as return or branch.  This means that all control flow,
-<em>including fall throughs</em> must be made explicit in the LLVM IR.  If you
-violate this rule, the verifier will emit an error.
-
-<p>Finally, the CodeGen function returns the phi node as the value computed by
-the if/then/else expression.  In our example above, this returned value will
-feed into the code for the top-level function, which will create the return
-instruction.</p>
-
-<p>Overall, we now have the ability to execute conditional code in
-Kaleidoscope.  With this extension, Kaleidoscope is a fairly complete language
-that can calculate a wide variety of numeric functions.  Next up we'll add
-another useful expression that is familiar from non-functional languages...</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="for">'for' Loop Expression</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Now that we know how to add basic control flow constructs to the language,
-we have the tools to add more powerful things.  Lets add something more
-aggressive, a 'for' expression:</p>
-
-<div class="doc_code">
-<pre>
- extern putchard(char);
- def printstar(n)
-   for i = 1, i &lt; n, 1.0 in
-     putchard(42);  # ascii 42 = '*'
-
- # print 100 '*' characters
- printstar(100);
-</pre>
-</div>
-
-<p>This expression defines a new variable ("i" in this case) which iterates from
-a starting value, while the condition ("i &lt; n" in this case) is true,
-incrementing by an optional step value ("1.0" in this case).  If the step value
-is omitted, it defaults to 1.0.  While the loop is true, it executes its
-body expression.  Because we don't have anything better to return, we'll just
-define the loop as always returning 0.0.  In the future when we have mutable
-variables, it will get more useful.</p>
-
-<p>As before, lets talk about the changes that we need to Kaleidoscope to
-support this.</p>
-
-<!-- ======================================================================= -->
-<h4><a name="forlexer">Lexer Extensions for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>The lexer extensions are the same sort of thing as for if/then/else:</p>
-
-<div class="doc_code">
-<pre>
-  ... in Token.token ...
-  (* control *)
-  | If | Then | Else
-  <b>| For | In</b>
-
-  ... in Lexer.lex_ident...
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | "if" -&gt; [&lt; 'Token.If; stream &gt;]
-      | "then" -&gt; [&lt; 'Token.Then; stream &gt;]
-      | "else" -&gt; [&lt; 'Token.Else; stream &gt;]
-      <b>| "for" -&gt; [&lt; 'Token.For; stream &gt;]
-      | "in" -&gt; [&lt; 'Token.In; stream &gt;]</b>
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="forast">AST Extensions for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>The AST variant is just as simple.  It basically boils down to capturing
-the variable name and the constituent expressions in the node.</p>
-
-<div class="doc_code">
-<pre>
-type expr =
-  ...
-  (* variant for for/in. *)
-  | For of string * expr * expr * expr option * expr
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="forparser">Parser Extensions for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>The parser code is also fairly standard.  The only interesting thing here is
-handling of the optional step value.  The parser code handles it by checking to
-see if the second comma is present.  If not, it sets the step value to null in
-the AST node:</p>
-
-<div class="doc_code">
-<pre>
-let rec parse_primary = parser
-  ...
-  (* forexpr
-        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
-  | [&lt; 'Token.For;
-       'Token.Ident id ?? "expected identifier after for";
-       'Token.Kwd '=' ?? "expected '=' after for";
-       stream &gt;] -&gt;
-      begin parser
-        | [&lt;
-             start=parse_expr;
-             'Token.Kwd ',' ?? "expected ',' after for";
-             end_=parse_expr;
-             stream &gt;] -&gt;
-            let step =
-              begin parser
-              | [&lt; 'Token.Kwd ','; step=parse_expr &gt;] -&gt; Some step
-              | [&lt; &gt;] -&gt; None
-              end stream
-            in
-            begin parser
-            | [&lt; 'Token.In; body=parse_expr &gt;] -&gt;
-                Ast.For (id, start, end_, step, body)
-            | [&lt; &gt;] -&gt;
-                raise (Stream.Error "expected 'in' after for")
-            end stream
-        | [&lt; &gt;] -&gt;
-            raise (Stream.Error "expected '=' after for")
-      end stream
-</pre>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="forir">LLVM IR for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Now we get to the good part: the LLVM IR we want to generate for this thing.
-With the simple example above, we get this LLVM IR (note that this dump is
-generated with optimizations disabled for clarity):
-</p>
-
-<div class="doc_code">
-<pre>
-declare double @putchard(double)
-
-define double @printstar(double %n) {
-entry:
-        ; initial value = 1.0 (inlined into phi)
-  br label %loop
-
-loop:    ; preds = %loop, %entry
-  %i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ]
-        ; body
-  %calltmp = call double @putchard(double 4.200000e+01)
-        ; increment
-  %nextvar = fadd double %i, 1.000000e+00
-
-        ; termination test
-  %cmptmp = fcmp ult double %i, %n
-  %booltmp = uitofp i1 %cmptmp to double
-  %loopcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %loopcond, label %loop, label %afterloop
-
-afterloop:    ; preds = %loop
-        ; loop always returns 0.0
-  ret double 0.000000e+00
-}
-</pre>
-</div>
-
-<p>This loop contains all the same constructs we saw before: a phi node, several
-expressions, and some basic blocks.  Lets see how this fits together.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="forcodegen">Code Generation for the 'for' Loop</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>The first part of Codegen is very simple: we just output the start expression
-for the loop value:</p>
-
-<div class="doc_code">
-<pre>
-let rec codegen_expr = function
-  ...
-  | Ast.For (var_name, start, end_, step, body) -&gt;
-      (* Emit the start code first, without 'variable' in scope. *)
-      let start_val = codegen_expr start in
-</pre>
-</div>
-
-<p>With this out of the way, the next step is to set up the LLVM basic block
-for the start of the loop body.  In the case above, the whole loop body is one
-block, but remember that the body code itself could consist of multiple blocks
-(e.g. if it contains an if/then/else or a for/in expression).</p>
-
-<div class="doc_code">
-<pre>
-      (* Make the new basic block for the loop header, inserting after current
-       * block. *)
-      let preheader_bb = insertion_block builder in
-      let the_function = block_parent preheader_bb in
-      let loop_bb = append_block context "loop" the_function in
-
-      (* Insert an explicit fall through from the current block to the
-       * loop_bb. *)
-      ignore (build_br loop_bb builder);
-</pre>
-</div>
-
-<p>This code is similar to what we saw for if/then/else.  Because we will need
-it to create the Phi node, we remember the block that falls through into the
-loop.  Once we have that, we create the actual block that starts the loop and
-create an unconditional branch for the fall-through between the two blocks.</p>
-
-<div class="doc_code">
-<pre>
-      (* Start insertion in loop_bb. *)
-      position_at_end loop_bb builder;
-
-      (* Start the PHI node with an entry for start. *)
-      let variable = build_phi [(start_val, preheader_bb)] var_name builder in
-</pre>
-</div>
-
-<p>Now that the "preheader" for the loop is set up, we switch to emitting code
-for the loop body.  To begin with, we move the insertion point and create the
-PHI node for the loop induction variable.  Since we already know the incoming
-value for the starting value, we add it to the Phi node.  Note that the Phi will
-eventually get a second value for the backedge, but we can't set it up yet
-(because it doesn't exist!).</p>
-
-<div class="doc_code">
-<pre>
-      (* Within the loop, the variable is defined equal to the PHI node. If it
-       * shadows an existing variable, we have to restore it, so save it
-       * now. *)
-      let old_val =
-        try Some (Hashtbl.find named_values var_name) with Not_found -&gt; None
-      in
-      Hashtbl.add named_values var_name variable;
-
-      (* Emit the body of the loop.  This, like any other expr, can change the
-       * current BB.  Note that we ignore the value computed by the body, but
-       * don't allow an error *)
-      ignore (codegen_expr body);
-</pre>
-</div>
-
-<p>Now the code starts to get more interesting.  Our 'for' loop introduces a new
-variable to the symbol table.  This means that our symbol table can now contain
-either function arguments or loop variables.  To handle this, before we codegen
-the body of the loop, we add the loop variable as the current value for its
-name.  Note that it is possible that there is a variable of the same name in the
-outer scope.  It would be easy to make this an error (emit an error and return
-null if there is already an entry for VarName) but we choose to allow shadowing
-of variables.  In order to handle this correctly, we remember the Value that
-we are potentially shadowing in <tt>old_val</tt> (which will be None if there is
-no shadowed variable).</p>
-
-<p>Once the loop variable is set into the symbol table, the code recursively
-codegen's the body.  This allows the body to use the loop variable: any
-references to it will naturally find it in the symbol table.</p>
-
-<div class="doc_code">
-<pre>
-      (* Emit the step value. *)
-      let step_val =
-        match step with
-        | Some step -&gt; codegen_expr step
-        (* If not specified, use 1.0. *)
-        | None -&gt; const_float double_type 1.0
-      in
-
-      let next_var = build_add variable step_val "nextvar" builder in
-</pre>
-</div>
-
-<p>Now that the body is emitted, we compute the next value of the iteration
-variable by adding the step value, or 1.0 if it isn't present.
-'<tt>next_var</tt>' will be the value of the loop variable on the next iteration
-of the loop.</p>
-
-<div class="doc_code">
-<pre>
-      (* Compute the end condition. *)
-      let end_cond = codegen_expr end_ in
-
-      (* Convert condition to a bool by comparing equal to 0.0. *)
-      let zero = const_float double_type 0.0 in
-      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
-</pre>
-</div>
-
-<p>Finally, we evaluate the exit value of the loop, to determine whether the
-loop should exit.  This mirrors the condition evaluation for the if/then/else
-statement.</p>
-
-<div class="doc_code">
-<pre>
-      (* Create the "after loop" block and insert it. *)
-      let loop_end_bb = insertion_block builder in
-      let after_bb = append_block context "afterloop" the_function in
-
-      (* Insert the conditional branch into the end of loop_end_bb. *)
-      ignore (build_cond_br end_cond loop_bb after_bb builder);
-
-      (* Any new code will be inserted in after_bb. *)
-      position_at_end after_bb builder;
-</pre>
-</div>
-
-<p>With the code for the body of the loop complete, we just need to finish up
-the control flow for it.  This code remembers the end block (for the phi node), then creates the block for the loop exit ("afterloop").  Based on the value of the
-exit condition, it creates a conditional branch that chooses between executing
-the loop again and exiting the loop.  Any future code is emitted in the
-"afterloop" block, so it sets the insertion position to it.</p>
-
-<div class="doc_code">
-<pre>
-      (* Add a new entry to the PHI node for the backedge. *)
-      add_incoming (next_var, loop_end_bb) variable;
-
-      (* Restore the unshadowed variable. *)
-      begin match old_val with
-      | Some old_val -&gt; Hashtbl.add named_values var_name old_val
-      | None -&gt; ()
-      end;
-
-      (* for expr always returns 0.0. *)
-      const_null double_type
-</pre>
-</div>
-
-<p>The final code handles various cleanups: now that we have the
-"<tt>next_var</tt>" value, we can add the incoming value to the loop PHI node.
-After that, we remove the loop variable from the symbol table, so that it isn't
-in scope after the for loop.  Finally, code generation of the for loop always
-returns 0.0, so that is what we return from <tt>Codegen.codegen_expr</tt>.</p>
-
-<p>With this, we conclude the "adding control flow to Kaleidoscope" chapter of
-the tutorial.  In this chapter we added two control flow constructs, and used
-them to motivate a couple of aspects of the LLVM IR that are important for
-front-end implementors to know.  In the next chapter of our saga, we will get
-a bit crazier and add <a href="OCamlLangImpl6.html">user-defined operators</a>
-to our poor innocent language.</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with the
-if/then/else and for expressions..  To build this example, use:
-</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<dl>
-<dt>_tags:</dt>
-<dd class="doc_code">
-<pre>
-&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
-&lt;*.{byte,native}&gt;: g++, use_llvm, use_llvm_analysis
-&lt;*.{byte,native}&gt;: use_llvm_executionengine, use_llvm_target
-&lt;*.{byte,native}&gt;: use_llvm_scalar_opts, use_bindings
-</pre>
-</dd>
-
-<dt>myocamlbuild.ml:</dt>
-<dd class="doc_code">
-<pre>
-open Ocamlbuild_plugin;;
-
-ocaml_lib ~extern:true "llvm";;
-ocaml_lib ~extern:true "llvm_analysis";;
-ocaml_lib ~extern:true "llvm_executionengine";;
-ocaml_lib ~extern:true "llvm_target";;
-ocaml_lib ~extern:true "llvm_scalar_opts";;
-
-flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
-dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
-</pre>
-</dd>
-
-<dt>token.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-
-  (* control *)
-  | If | Then | Else
-  | For | In
-</pre>
-</dd>
-
-<dt>lexer.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [&lt; ' ('#'); stream &gt;] -&gt;
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [&lt; 'c; stream &gt;] -&gt;
-      [&lt; 'Token.Kwd c; lex stream &gt;]
-
-  (* end of stream. *)
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-
-and lex_number buffer = parser
-  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
-
-and lex_ident buffer = parser
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | "if" -&gt; [&lt; 'Token.If; stream &gt;]
-      | "then" -&gt; [&lt; 'Token.Then; stream &gt;]
-      | "else" -&gt; [&lt; 'Token.Else; stream &gt;]
-      | "for" -&gt; [&lt; 'Token.For; stream &gt;]
-      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-
-and lex_comment = parser
-  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
-  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-</pre>
-</dd>
-
-<dt>ast.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-  (* variant for if/then/else. *)
-  | If of expr * expr * expr
-
-  (* variant for for/in. *)
-  | For of string * expr * expr * expr option * expr
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto = Prototype of string * string array
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-</pre>
-</dd>
-
-<dt>parser.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr
- *   ::= ifexpr
- *   ::= forexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
-      let rec parse_args accumulator = parser
-        | [&lt; e=parse_expr; stream &gt;] -&gt;
-            begin parser
-              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
-              | [&lt; &gt;] -&gt; e :: accumulator
-            end stream
-        | [&lt; &gt;] -&gt; accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [&lt; 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [&lt; &gt;] -&gt; Ast.Variable id
-      in
-      parse_ident id stream
-
-  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
-  | [&lt; 'Token.If; c=parse_expr;
-       'Token.Then ?? "expected 'then'"; t=parse_expr;
-       'Token.Else ?? "expected 'else'"; e=parse_expr &gt;] -&gt;
-      Ast.If (c, t, e)
-
-  (* forexpr
-        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
-  | [&lt; 'Token.For;
-       'Token.Ident id ?? "expected identifier after for";
-       'Token.Kwd '=' ?? "expected '=' after for";
-       stream &gt;] -&gt;
-      begin parser
-        | [&lt;
-             start=parse_expr;
-             'Token.Kwd ',' ?? "expected ',' after for";
-             end_=parse_expr;
-             stream &gt;] -&gt;
-            let step =
-              begin parser
-              | [&lt; 'Token.Kwd ','; step=parse_expr &gt;] -&gt; Some step
-              | [&lt; &gt;] -&gt; None
-              end stream
-            in
-            begin parser
-            | [&lt; 'Token.In; body=parse_expr &gt;] -&gt;
-                Ast.For (id, start, end_, step, body)
-            | [&lt; &gt;] -&gt;
-                raise (Stream.Error "expected 'in' after for")
-            end stream
-        | [&lt; &gt;] -&gt;
-            raise (Stream.Error "expected '=' after for")
-      end stream
-
-  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec &lt; expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the primary expression after the binary operator. *)
-        let rhs = parse_primary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) -&gt;
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec &lt; next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -&gt; rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -&gt; lhs
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [&lt; lhs=parse_primary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')' *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [&lt; e=parse_expr &gt;] -&gt;
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
-</pre>
-</dd>
-
-<dt>codegen.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Code Generation
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-
-let rec codegen_expr = function
-  | Ast.Number n -&gt; const_float double_type n
-  | Ast.Variable name -&gt;
-      (try Hashtbl.find named_values name with
-        | Not_found -&gt; raise (Error "unknown variable name"))
-  | Ast.Binary (op, lhs, rhs) -&gt;
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
-        | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
-        | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
-        | '&lt;' -&gt;
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        | _ -&gt; raise (Error "invalid binary operator")
-      end
-  | Ast.Call (callee, args) -&gt;
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-  | Ast.If (cond, then_, else_) -&gt;
-      let cond = codegen_expr cond in
-
-      (* Convert condition to a bool by comparing equal to 0.0 *)
-      let zero = const_float double_type 0.0 in
-      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
-
-      (* Grab the first block so that we might later add the conditional branch
-       * to it at the end of the function. *)
-      let start_bb = insertion_block builder in
-      let the_function = block_parent start_bb in
-
-      let then_bb = append_block context "then" the_function in
-
-      (* Emit 'then' value. *)
-      position_at_end then_bb builder;
-      let then_val = codegen_expr then_ in
-
-      (* Codegen of 'then' can change the current block, update then_bb for the
-       * phi. We create a new name because one is used for the phi node, and the
-       * other is used for the conditional branch. *)
-      let new_then_bb = insertion_block builder in
-
-      (* Emit 'else' value. *)
-      let else_bb = append_block context "else" the_function in
-      position_at_end else_bb builder;
-      let else_val = codegen_expr else_ in
-
-      (* Codegen of 'else' can change the current block, update else_bb for the
-       * phi. *)
-      let new_else_bb = insertion_block builder in
-
-      (* Emit merge block. *)
-      let merge_bb = append_block context "ifcont" the_function in
-      position_at_end merge_bb builder;
-      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
-      let phi = build_phi incoming "iftmp" builder in
-
-      (* Return to the start block to add the conditional branch. *)
-      position_at_end start_bb builder;
-      ignore (build_cond_br cond_val then_bb else_bb builder);
-
-      (* Set a unconditional branch at the end of the 'then' block and the
-       * 'else' block to the 'merge' block. *)
-      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
-      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
-
-      (* Finally, set the builder to the end of the merge block. *)
-      position_at_end merge_bb builder;
-
-      phi
-  | Ast.For (var_name, start, end_, step, body) -&gt;
-      (* Emit the start code first, without 'variable' in scope. *)
-      let start_val = codegen_expr start in
-
-      (* Make the new basic block for the loop header, inserting after current
-       * block. *)
-      let preheader_bb = insertion_block builder in
-      let the_function = block_parent preheader_bb in
-      let loop_bb = append_block context "loop" the_function in
-
-      (* Insert an explicit fall through from the current block to the
-       * loop_bb. *)
-      ignore (build_br loop_bb builder);
-
-      (* Start insertion in loop_bb. *)
-      position_at_end loop_bb builder;
-
-      (* Start the PHI node with an entry for start. *)
-      let variable = build_phi [(start_val, preheader_bb)] var_name builder in
-
-      (* Within the loop, the variable is defined equal to the PHI node. If it
-       * shadows an existing variable, we have to restore it, so save it
-       * now. *)
-      let old_val =
-        try Some (Hashtbl.find named_values var_name) with Not_found -&gt; None
-      in
-      Hashtbl.add named_values var_name variable;
-
-      (* Emit the body of the loop.  This, like any other expr, can change the
-       * current BB.  Note that we ignore the value computed by the body, but
-       * don't allow an error *)
-      ignore (codegen_expr body);
-
-      (* Emit the step value. *)
-      let step_val =
-        match step with
-        | Some step -&gt; codegen_expr step
-        (* If not specified, use 1.0. *)
-        | None -&gt; const_float double_type 1.0
-      in
-
-      let next_var = build_add variable step_val "nextvar" builder in
-
-      (* Compute the end condition. *)
-      let end_cond = codegen_expr end_ in
-
-      (* Convert condition to a bool by comparing equal to 0.0. *)
-      let zero = const_float double_type 0.0 in
-      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
-
-      (* Create the "after loop" block and insert it. *)
-      let loop_end_bb = insertion_block builder in
-      let after_bb = append_block context "afterloop" the_function in
-
-      (* Insert the conditional branch into the end of loop_end_bb. *)
-      ignore (build_cond_br end_cond loop_bb after_bb builder);
-
-      (* Any new code will be inserted in after_bb. *)
-      position_at_end after_bb builder;
-
-      (* Add a new entry to the PHI node for the backedge. *)
-      add_incoming (next_var, loop_end_bb) variable;
-
-      (* Restore the unshadowed variable. *)
-      begin match old_val with
-      | Some old_val -&gt; Hashtbl.add named_values var_name old_val
-      | None -&gt; ()
-      end;
-
-      (* for expr always returns 0.0. *)
-      const_null double_type
-
-let codegen_proto = function
-  | Ast.Prototype (name, args) -&gt;
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-        | None -&gt; declare_function name ft the_module
-
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f -&gt;
-            (* If 'f' already has a body, reject this. *)
-            if block_begin f &lt;&gt; At_end f then
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if element_type (type_of f) &lt;&gt; ft then
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a -&gt;
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-
-let codegen_func the_fpm = function
-  | Ast.Function (proto, body) -&gt;
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        (* Optimize the function. *)
-        let _ = PassManager.run_function the_function the_fpm in
-
-        the_function
-      with e -&gt;
-        delete_function the_function;
-        raise e
-</pre>
-</dd>
-
-<dt>toplevel.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop the_fpm the_execution_engine stream =
-  match Stream.peek stream with
-  | None -&gt; ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') -&gt;
-      Stream.junk stream;
-      main_loop the_fpm the_execution_engine stream
-
-  | Some token -&gt;
-      begin
-        try match token with
-        | Token.Def -&gt;
-            let e = Parser.parse_definition stream in
-            print_endline "parsed a function definition.";
-            dump_value (Codegen.codegen_func the_fpm e);
-        | Token.Extern -&gt;
-            let e = Parser.parse_extern stream in
-            print_endline "parsed an extern.";
-            dump_value (Codegen.codegen_proto e);
-        | _ -&gt;
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            let the_function = Codegen.codegen_func the_fpm e in
-            dump_value the_function;
-
-            (* JIT the function, returning a function pointer. *)
-            let result = ExecutionEngine.run_function the_function [||]
-              the_execution_engine in
-
-            print_string "Evaluated to ";
-            print_float (GenericValue.as_float Codegen.double_type result);
-            print_newline ();
-        with Stream.Error s | Codegen.Error s -&gt;
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready&gt; "; flush stdout;
-      main_loop the_fpm the_execution_engine stream
-</pre>
-</dd>
-
-<dt>toy.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-open Llvm_target
-open Llvm_scalar_opts
-
-let main () =
-  ignore (initialize_native_target ());
-
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '&lt;' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready&gt; "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combination the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-
-  (* Eliminate Common SubExpressions. *)
-  add_gvn the_fpm;
-
-  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
-  add_cfg_simplification the_fpm;
-
-  ignore (PassManager.initialize the_fpm);
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop the_fpm the_execution_engine stream;
-
-  (* Print out all the generated code. *)
-  dump_module Codegen.the_module
-;;
-
-main ()
-</pre>
-</dd>
-
-<dt>bindings.c</dt>
-<dd class="doc_code">
-<pre>
-#include &lt;stdio.h&gt;
-
-/* putchard - putchar that takes a double and returns 0. */
-extern double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-</pre>
-</dd>
-</dl>
-
-<a href="OCamlLangImpl6.html">Next: Extending the language: user-defined
-operators</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/OCamlLangImpl5.rst b/docs/tutorial/OCamlLangImpl5.rst
new file mode 100644
index 0000000000..203fb6f73b
--- /dev/null
+++ b/docs/tutorial/OCamlLangImpl5.rst
@@ -0,0 +1,1365 @@
+==================================================
+Kaleidoscope: Extending the Language: Control Flow
+==================================================
+
+.. contents::
+   :local:
+
+Written by `Chris Lattner <mailto:sabre@nondot.org>`_ and `Erick
+Tryzelaar <mailto:idadesub@users.sourceforge.net>`_
+
+Chapter 5 Introduction
+======================
+
+Welcome to Chapter 5 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. Parts 1-4 described the implementation of
+the simple Kaleidoscope language and included support for generating
+LLVM IR, followed by optimizations and a JIT compiler. Unfortunately, as
+presented, Kaleidoscope is mostly useless: it has no control flow other
+than call and return. This means that you can't have conditional
+branches in the code, significantly limiting its power. In this episode
+of "build that compiler", we'll extend Kaleidoscope to have an
+if/then/else expression plus a simple 'for' loop.
+
+If/Then/Else
+============
+
+Extending Kaleidoscope to support if/then/else is quite straightforward.
+It basically requires adding lexer support for this "new" concept to the
+lexer, parser, AST, and LLVM code emitter. This example is nice, because
+it shows how easy it is to "grow" a language over time, incrementally
+extending it as new ideas are discovered.
+
+Before we get going on "how" we add this extension, lets talk about
+"what" we want. The basic idea is that we want to be able to write this
+sort of thing:
+
+::
+
+    def fib(x)
+      if x < 3 then
+        1
+      else
+        fib(x-1)+fib(x-2);
+
+In Kaleidoscope, every construct is an expression: there are no
+statements. As such, the if/then/else expression needs to return a value
+like any other. Since we're using a mostly functional form, we'll have
+it evaluate its conditional, then return the 'then' or 'else' value
+based on how the condition was resolved. This is very similar to the C
+"?:" expression.
+
+The semantics of the if/then/else expression is that it evaluates the
+condition to a boolean equality value: 0.0 is considered to be false and
+everything else is considered to be true. If the condition is true, the
+first subexpression is evaluated and returned, if the condition is
+false, the second subexpression is evaluated and returned. Since
+Kaleidoscope allows side-effects, this behavior is important to nail
+down.
+
+Now that we know what we "want", lets break this down into its
+constituent pieces.
+
+Lexer Extensions for If/Then/Else
+---------------------------------
+
+The lexer extensions are straightforward. First we add new variants for
+the relevant tokens:
+
+.. code-block:: ocaml
+
+      (* control *)
+      | If | Then | Else | For | In
+
+Once we have that, we recognize the new keywords in the lexer. This is
+pretty simple stuff:
+
+.. code-block:: ocaml
+
+          ...
+          match Buffer.contents buffer with
+          | "def" -> [< 'Token.Def; stream >]
+          | "extern" -> [< 'Token.Extern; stream >]
+          | "if" -> [< 'Token.If; stream >]
+          | "then" -> [< 'Token.Then; stream >]
+          | "else" -> [< 'Token.Else; stream >]
+          | "for" -> [< 'Token.For; stream >]
+          | "in" -> [< 'Token.In; stream >]
+          | id -> [< 'Token.Ident id; stream >]
+
+AST Extensions for If/Then/Else
+-------------------------------
+
+To represent the new expression we add a new AST variant for it:
+
+.. code-block:: ocaml
+
+    type expr =
+      ...
+      (* variant for if/then/else. *)
+      | If of expr * expr * expr
+
+The AST variant just has pointers to the various subexpressions.
+
+Parser Extensions for If/Then/Else
+----------------------------------
+
+Now that we have the relevant tokens coming from the lexer and we have
+the AST node to build, our parsing logic is relatively straightforward.
+First we define a new parsing function:
+
+.. code-block:: ocaml
+
+    let rec parse_primary = parser
+      ...
+      (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+      | [< 'Token.If; c=parse_expr;
+           'Token.Then ?? "expected 'then'"; t=parse_expr;
+           'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
+          Ast.If (c, t, e)
+
+Next we hook it up as a primary expression:
+
+.. code-block:: ocaml
+
+    let rec parse_primary = parser
+      ...
+      (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+      | [< 'Token.If; c=parse_expr;
+           'Token.Then ?? "expected 'then'"; t=parse_expr;
+           'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
+          Ast.If (c, t, e)
+
+LLVM IR for If/Then/Else
+------------------------
+
+Now that we have it parsing and building the AST, the final piece is
+adding LLVM code generation support. This is the most interesting part
+of the if/then/else example, because this is where it starts to
+introduce new concepts. All of the code above has been thoroughly
+described in previous chapters.
+
+To motivate the code we want to produce, lets take a look at a simple
+example. Consider:
+
+::
+
+    extern foo();
+    extern bar();
+    def baz(x) if x then foo() else bar();
+
+If you disable optimizations, the code you'll (soon) get from
+Kaleidoscope looks like this:
+
+.. code-block:: llvm
+
+    declare double @foo()
+
+    declare double @bar()
+
+    define double @baz(double %x) {
+    entry:
+      %ifcond = fcmp one double %x, 0.000000e+00
+      br i1 %ifcond, label %then, label %else
+
+    then:    ; preds = %entry
+      %calltmp = call double @foo()
+      br label %ifcont
+
+    else:    ; preds = %entry
+      %calltmp1 = call double @bar()
+      br label %ifcont
+
+    ifcont:    ; preds = %else, %then
+      %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ]
+      ret double %iftmp
+    }
+
+To visualize the control flow graph, you can use a nifty feature of the
+LLVM '`opt <http://llvm.org/cmds/opt.html>`_' tool. If you put this LLVM
+IR into "t.ll" and run "``llvm-as < t.ll | opt -analyze -view-cfg``", `a
+window will pop up <../ProgrammersManual.html#ViewGraph>`_ and you'll
+see this graph:
+
+.. figure:: LangImpl5-cfg.png
+   :align: center
+   :alt: Example CFG
+
+   Example CFG
+
+Another way to get this is to call
+"``Llvm_analysis.view_function_cfg f``" or
+"``Llvm_analysis.view_function_cfg_only f``" (where ``f`` is a
+"``Function``") either by inserting actual calls into the code and
+recompiling or by calling these in the debugger. LLVM has many nice
+features for visualizing various graphs.
+
+Getting back to the generated code, it is fairly simple: the entry block
+evaluates the conditional expression ("x" in our case here) and compares
+the result to 0.0 with the "``fcmp one``" instruction ('one' is "Ordered
+and Not Equal"). Based on the result of this expression, the code jumps
+to either the "then" or "else" blocks, which contain the expressions for
+the true/false cases.
+
+Once the then/else blocks are finished executing, they both branch back
+to the 'ifcont' block to execute the code that happens after the
+if/then/else. In this case the only thing left to do is to return to the
+caller of the function. The question then becomes: how does the code
+know which expression to return?
+
+The answer to this question involves an important SSA operation: the
+`Phi
+operation <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_.
+If you're not familiar with SSA, `the wikipedia
+article <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_
+is a good introduction and there are various other introductions to it
+available on your favorite search engine. The short version is that
+"execution" of the Phi operation requires "remembering" which block
+control came from. The Phi operation takes on the value corresponding to
+the input control block. In this case, if control comes in from the
+"then" block, it gets the value of "calltmp". If control comes from the
+"else" block, it gets the value of "calltmp1".
+
+At this point, you are probably starting to think "Oh no! This means my
+simple and elegant front-end will have to start generating SSA form in
+order to use LLVM!". Fortunately, this is not the case, and we strongly
+advise *not* implementing an SSA construction algorithm in your
+front-end unless there is an amazingly good reason to do so. In
+practice, there are two sorts of values that float around in code
+written for your average imperative programming language that might need
+Phi nodes:
+
+#. Code that involves user variables: ``x = 1; x = x + 1;``
+#. Values that are implicit in the structure of your AST, such as the
+   Phi node in this case.
+
+In `Chapter 7 <OCamlLangImpl7.html>`_ of this tutorial ("mutable
+variables"), we'll talk about #1 in depth. For now, just believe me that
+you don't need SSA construction to handle this case. For #2, you have
+the choice of using the techniques that we will describe for #1, or you
+can insert Phi nodes directly, if convenient. In this case, it is really
+really easy to generate the Phi node, so we choose to do it directly.
+
+Okay, enough of the motivation and overview, lets generate code!
+
+Code Generation for If/Then/Else
+--------------------------------
+
+In order to generate code for this, we implement the ``Codegen`` method
+for ``IfExprAST``:
+
+.. code-block:: ocaml
+
+    let rec codegen_expr = function
+      ...
+      | Ast.If (cond, then_, else_) ->
+          let cond = codegen_expr cond in
+
+          (* Convert condition to a bool by comparing equal to 0.0 *)
+          let zero = const_float double_type 0.0 in
+          let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
+
+This code is straightforward and similar to what we saw before. We emit
+the expression for the condition, then compare that value to zero to get
+a truth value as a 1-bit (bool) value.
+
+.. code-block:: ocaml
+
+          (* Grab the first block so that we might later add the conditional branch
+           * to it at the end of the function. *)
+          let start_bb = insertion_block builder in
+          let the_function = block_parent start_bb in
+
+          let then_bb = append_block context "then" the_function in
+          position_at_end then_bb builder;
+
+As opposed to the `C++ tutorial <LangImpl5.html>`_, we have to build our
+basic blocks bottom up since we can't have dangling BasicBlocks. We
+start off by saving a pointer to the first block (which might not be the
+entry block), which we'll need to build a conditional branch later. We
+do this by asking the ``builder`` for the current BasicBlock. The fourth
+line gets the current Function object that is being built. It gets this
+by the ``start_bb`` for its "parent" (the function it is currently
+embedded into).
+
+Once it has that, it creates one block. It is automatically appended
+into the function's list of blocks.
+
+.. code-block:: ocaml
+
+          (* Emit 'then' value. *)
+          position_at_end then_bb builder;
+          let then_val = codegen_expr then_ in
+
+          (* Codegen of 'then' can change the current block, update then_bb for the
+           * phi. We create a new name because one is used for the phi node, and the
+           * other is used for the conditional branch. *)
+          let new_then_bb = insertion_block builder in
+
+We move the builder to start inserting into the "then" block. Strictly
+speaking, this call moves the insertion point to be at the end of the
+specified block. However, since the "then" block is empty, it also
+starts out by inserting at the beginning of the block. :)
+
+Once the insertion point is set, we recursively codegen the "then"
+expression from the AST.
+
+The final line here is quite subtle, but is very important. The basic
+issue is that when we create the Phi node in the merge block, we need to
+set up the block/value pairs that indicate how the Phi will work.
+Importantly, the Phi node expects to have an entry for each predecessor
+of the block in the CFG. Why then, are we getting the current block when
+we just set it to ThenBB 5 lines above? The problem is that the "Then"
+expression may actually itself change the block that the Builder is
+emitting into if, for example, it contains a nested "if/then/else"
+expression. Because calling Codegen recursively could arbitrarily change
+the notion of the current block, we are required to get an up-to-date
+value for code that will set up the Phi node.
+
+.. code-block:: ocaml
+
+          (* Emit 'else' value. *)
+          let else_bb = append_block context "else" the_function in
+          position_at_end else_bb builder;
+          let else_val = codegen_expr else_ in
+
+          (* Codegen of 'else' can change the current block, update else_bb for the
+           * phi. *)
+          let new_else_bb = insertion_block builder in
+
+Code generation for the 'else' block is basically identical to codegen
+for the 'then' block.
+
+.. code-block:: ocaml
+
+          (* Emit merge block. *)
+          let merge_bb = append_block context "ifcont" the_function in
+          position_at_end merge_bb builder;
+          let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
+          let phi = build_phi incoming "iftmp" builder in
+
+The first two lines here are now familiar: the first adds the "merge"
+block to the Function object. The second block changes the insertion
+point so that newly created code will go into the "merge" block. Once
+that is done, we need to create the PHI node and set up the block/value
+pairs for the PHI.
+
+.. code-block:: ocaml
+
+          (* Return to the start block to add the conditional branch. *)
+          position_at_end start_bb builder;
+          ignore (build_cond_br cond_val then_bb else_bb builder);
+
+Once the blocks are created, we can emit the conditional branch that
+chooses between them. Note that creating new blocks does not implicitly
+affect the IRBuilder, so it is still inserting into the block that the
+condition went into. This is why we needed to save the "start" block.
+
+.. code-block:: ocaml
+
+          (* Set a unconditional branch at the end of the 'then' block and the
+           * 'else' block to the 'merge' block. *)
+          position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
+          position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
+
+          (* Finally, set the builder to the end of the merge block. *)
+          position_at_end merge_bb builder;
+
+          phi
+
+To finish off the blocks, we create an unconditional branch to the merge
+block. One interesting (and very important) aspect of the LLVM IR is
+that it `requires all basic blocks to be
+"terminated" <../LangRef.html#functionstructure>`_ with a `control flow
+instruction <../LangRef.html#terminators>`_ such as return or branch.
+This means that all control flow, *including fall throughs* must be made
+explicit in the LLVM IR. If you violate this rule, the verifier will
+emit an error.
+
+Finally, the CodeGen function returns the phi node as the value computed
+by the if/then/else expression. In our example above, this returned
+value will feed into the code for the top-level function, which will
+create the return instruction.
+
+Overall, we now have the ability to execute conditional code in
+Kaleidoscope. With this extension, Kaleidoscope is a fairly complete
+language that can calculate a wide variety of numeric functions. Next up
+we'll add another useful expression that is familiar from non-functional
+languages...
+
+'for' Loop Expression
+=====================
+
+Now that we know how to add basic control flow constructs to the
+language, we have the tools to add more powerful things. Lets add
+something more aggressive, a 'for' expression:
+
+::
+
+     extern putchard(char);
+     def printstar(n)
+       for i = 1, i < n, 1.0 in
+         putchard(42);  # ascii 42 = '*'
+
+     # print 100 '*' characters
+     printstar(100);
+
+This expression defines a new variable ("i" in this case) which iterates
+from a starting value, while the condition ("i < n" in this case) is
+true, incrementing by an optional step value ("1.0" in this case). If
+the step value is omitted, it defaults to 1.0. While the loop is true,
+it executes its body expression. Because we don't have anything better
+to return, we'll just define the loop as always returning 0.0. In the
+future when we have mutable variables, it will get more useful.
+
+As before, lets talk about the changes that we need to Kaleidoscope to
+support this.
+
+Lexer Extensions for the 'for' Loop
+-----------------------------------
+
+The lexer extensions are the same sort of thing as for if/then/else:
+
+.. code-block:: ocaml
+
+      ... in Token.token ...
+      (* control *)
+      | If | Then | Else
+      | For | In
+
+      ... in Lexer.lex_ident...
+          match Buffer.contents buffer with
+          | "def" -> [< 'Token.Def; stream >]
+          | "extern" -> [< 'Token.Extern; stream >]
+          | "if" -> [< 'Token.If; stream >]
+          | "then" -> [< 'Token.Then; stream >]
+          | "else" -> [< 'Token.Else; stream >]
+          | "for" -> [< 'Token.For; stream >]
+          | "in" -> [< 'Token.In; stream >]
+          | id -> [< 'Token.Ident id; stream >]
+
+AST Extensions for the 'for' Loop
+---------------------------------
+
+The AST variant is just as simple. It basically boils down to capturing
+the variable name and the constituent expressions in the node.
+
+.. code-block:: ocaml
+
+    type expr =
+      ...
+      (* variant for for/in. *)
+      | For of string * expr * expr * expr option * expr
+
+Parser Extensions for the 'for' Loop
+------------------------------------
+
+The parser code is also fairly standard. The only interesting thing here
+is handling of the optional step value. The parser code handles it by
+checking to see if the second comma is present. If not, it sets the step
+value to null in the AST node:
+
+.. code-block:: ocaml
+
+    let rec parse_primary = parser
+      ...
+      (* forexpr
+            ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
+      | [< 'Token.For;
+           'Token.Ident id ?? "expected identifier after for";
+           'Token.Kwd '=' ?? "expected '=' after for";
+           stream >] ->
+          begin parser
+            | [<
+                 start=parse_expr;
+                 'Token.Kwd ',' ?? "expected ',' after for";
+                 end_=parse_expr;
+                 stream >] ->
+                let step =
+                  begin parser
+                  | [< 'Token.Kwd ','; step=parse_expr >] -> Some step
+                  | [< >] -> None
+                  end stream
+                in
+                begin parser
+                | [< 'Token.In; body=parse_expr >] ->
+                    Ast.For (id, start, end_, step, body)
+                | [< >] ->
+                    raise (Stream.Error "expected 'in' after for")
+                end stream
+            | [< >] ->
+                raise (Stream.Error "expected '=' after for")
+          end stream
+
+LLVM IR for the 'for' Loop
+--------------------------
+
+Now we get to the good part: the LLVM IR we want to generate for this
+thing. With the simple example above, we get this LLVM IR (note that
+this dump is generated with optimizations disabled for clarity):
+
+.. code-block:: llvm
+
+    declare double @putchard(double)
+
+    define double @printstar(double %n) {
+    entry:
+            ; initial value = 1.0 (inlined into phi)
+      br label %loop
+
+    loop:    ; preds = %loop, %entry
+      %i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ]
+            ; body
+      %calltmp = call double @putchard(double 4.200000e+01)
+            ; increment
+      %nextvar = fadd double %i, 1.000000e+00
+
+            ; termination test
+      %cmptmp = fcmp ult double %i, %n
+      %booltmp = uitofp i1 %cmptmp to double
+      %loopcond = fcmp one double %booltmp, 0.000000e+00
+      br i1 %loopcond, label %loop, label %afterloop
+
+    afterloop:    ; preds = %loop
+            ; loop always returns 0.0
+      ret double 0.000000e+00
+    }
+
+This loop contains all the same constructs we saw before: a phi node,
+several expressions, and some basic blocks. Lets see how this fits
+together.
+
+Code Generation for the 'for' Loop
+----------------------------------
+
+The first part of Codegen is very simple: we just output the start
+expression for the loop value:
+
+.. code-block:: ocaml
+
+    let rec codegen_expr = function
+      ...
+      | Ast.For (var_name, start, end_, step, body) ->
+          (* Emit the start code first, without 'variable' in scope. *)
+          let start_val = codegen_expr start in
+
+With this out of the way, the next step is to set up the LLVM basic
+block for the start of the loop body. In the case above, the whole loop
+body is one block, but remember that the body code itself could consist
+of multiple blocks (e.g. if it contains an if/then/else or a for/in
+expression).
+
+.. code-block:: ocaml
+
+          (* Make the new basic block for the loop header, inserting after current
+           * block. *)
+          let preheader_bb = insertion_block builder in
+          let the_function = block_parent preheader_bb in
+          let loop_bb = append_block context "loop" the_function in
+
+          (* Insert an explicit fall through from the current block to the
+           * loop_bb. *)
+          ignore (build_br loop_bb builder);
+
+This code is similar to what we saw for if/then/else. Because we will
+need it to create the Phi node, we remember the block that falls through
+into the loop. Once we have that, we create the actual block that starts
+the loop and create an unconditional branch for the fall-through between
+the two blocks.
+
+.. code-block:: ocaml
+
+          (* Start insertion in loop_bb. *)
+          position_at_end loop_bb builder;
+
+          (* Start the PHI node with an entry for start. *)
+          let variable = build_phi [(start_val, preheader_bb)] var_name builder in
+
+Now that the "preheader" for the loop is set up, we switch to emitting
+code for the loop body. To begin with, we move the insertion point and
+create the PHI node for the loop induction variable. Since we already
+know the incoming value for the starting value, we add it to the Phi
+node. Note that the Phi will eventually get a second value for the
+backedge, but we can't set it up yet (because it doesn't exist!).
+
+.. code-block:: ocaml
+
+          (* Within the loop, the variable is defined equal to the PHI node. If it
+           * shadows an existing variable, we have to restore it, so save it
+           * now. *)
+          let old_val =
+            try Some (Hashtbl.find named_values var_name) with Not_found -> None
+          in
+          Hashtbl.add named_values var_name variable;
+
+          (* Emit the body of the loop.  This, like any other expr, can change the
+           * current BB.  Note that we ignore the value computed by the body, but
+           * don't allow an error *)
+          ignore (codegen_expr body);
+
+Now the code starts to get more interesting. Our 'for' loop introduces a
+new variable to the symbol table. This means that our symbol table can
+now contain either function arguments or loop variables. To handle this,
+before we codegen the body of the loop, we add the loop variable as the
+current value for its name. Note that it is possible that there is a
+variable of the same name in the outer scope. It would be easy to make
+this an error (emit an error and return null if there is already an
+entry for VarName) but we choose to allow shadowing of variables. In
+order to handle this correctly, we remember the Value that we are
+potentially shadowing in ``old_val`` (which will be None if there is no
+shadowed variable).
+
+Once the loop variable is set into the symbol table, the code
+recursively codegen's the body. This allows the body to use the loop
+variable: any references to it will naturally find it in the symbol
+table.
+
+.. code-block:: ocaml
+
+          (* Emit the step value. *)
+          let step_val =
+            match step with
+            | Some step -> codegen_expr step
+            (* If not specified, use 1.0. *)
+            | None -> const_float double_type 1.0
+          in
+
+          let next_var = build_add variable step_val "nextvar" builder in
+
+Now that the body is emitted, we compute the next value of the iteration
+variable by adding the step value, or 1.0 if it isn't present.
+'``next_var``' will be the value of the loop variable on the next
+iteration of the loop.
+
+.. code-block:: ocaml
+
+          (* Compute the end condition. *)
+          let end_cond = codegen_expr end_ in
+
+          (* Convert condition to a bool by comparing equal to 0.0. *)
+          let zero = const_float double_type 0.0 in
+          let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
+
+Finally, we evaluate the exit value of the loop, to determine whether
+the loop should exit. This mirrors the condition evaluation for the
+if/then/else statement.
+
+.. code-block:: ocaml
+
+          (* Create the "after loop" block and insert it. *)
+          let loop_end_bb = insertion_block builder in
+          let after_bb = append_block context "afterloop" the_function in
+
+          (* Insert the conditional branch into the end of loop_end_bb. *)
+          ignore (build_cond_br end_cond loop_bb after_bb builder);
+
+          (* Any new code will be inserted in after_bb. *)
+          position_at_end after_bb builder;
+
+With the code for the body of the loop complete, we just need to finish
+up the control flow for it. This code remembers the end block (for the
+phi node), then creates the block for the loop exit ("afterloop"). Based
+on the value of the exit condition, it creates a conditional branch that
+chooses between executing the loop again and exiting the loop. Any
+future code is emitted in the "afterloop" block, so it sets the
+insertion position to it.
+
+.. code-block:: ocaml
+
+          (* Add a new entry to the PHI node for the backedge. *)
+          add_incoming (next_var, loop_end_bb) variable;
+
+          (* Restore the unshadowed variable. *)
+          begin match old_val with
+          | Some old_val -> Hashtbl.add named_values var_name old_val
+          | None -> ()
+          end;
+
+          (* for expr always returns 0.0. *)
+          const_null double_type
+
+The final code handles various cleanups: now that we have the
+"``next_var``" value, we can add the incoming value to the loop PHI
+node. After that, we remove the loop variable from the symbol table, so
+that it isn't in scope after the for loop. Finally, code generation of
+the for loop always returns 0.0, so that is what we return from
+``Codegen.codegen_expr``.
+
+With this, we conclude the "adding control flow to Kaleidoscope" chapter
+of the tutorial. In this chapter we added two control flow constructs,
+and used them to motivate a couple of aspects of the LLVM IR that are
+important for front-end implementors to know. In the next chapter of our
+saga, we will get a bit crazier and add `user-defined
+operators <OCamlLangImpl6.html>`_ to our poor innocent language.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+the if/then/else and for expressions.. To build this example, use:
+
+.. code-block:: bash
+
+    # Compile
+    ocamlbuild toy.byte
+    # Run
+    ./toy.byte
+
+Here is the code:
+
+\_tags:
+    ::
+
+        <{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
+        <*.{byte,native}>: g++, use_llvm, use_llvm_analysis
+        <*.{byte,native}>: use_llvm_executionengine, use_llvm_target
+        <*.{byte,native}>: use_llvm_scalar_opts, use_bindings
+
+myocamlbuild.ml:
+    .. code-block:: ocaml
+
+        open Ocamlbuild_plugin;;
+
+        ocaml_lib ~extern:true "llvm";;
+        ocaml_lib ~extern:true "llvm_analysis";;
+        ocaml_lib ~extern:true "llvm_executionengine";;
+        ocaml_lib ~extern:true "llvm_target";;
+        ocaml_lib ~extern:true "llvm_scalar_opts";;
+
+        flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
+        dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
+
+token.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer Tokens
+         *===----------------------------------------------------------------------===*)
+
+        (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+         * these others for known things. *)
+        type token =
+          (* commands *)
+          | Def | Extern
+
+          (* primary *)
+          | Ident of string | Number of float
+
+          (* unknown *)
+          | Kwd of char
+
+          (* control *)
+          | If | Then | Else
+          | For | In
+
+lexer.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer
+         *===----------------------------------------------------------------------===*)
+
+        let rec lex = parser
+          (* Skip any whitespace. *)
+          | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+          (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+
+          (* number: [0-9.]+ *)
+          | [< ' ('0' .. '9' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+
+          (* Comment until end of line. *)
+          | [< ' ('#'); stream >] ->
+              lex_comment stream
+
+          (* Otherwise, just return the character as its ascii value. *)
+          | [< 'c; stream >] ->
+              [< 'Token.Kwd c; lex stream >]
+
+          (* end of stream. *)
+          | [< >] -> [< >]
+
+        and lex_number buffer = parser
+          | [< ' ('0' .. '9' | '.' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+          | [< stream=lex >] ->
+              [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+        and lex_ident buffer = parser
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+          | [< stream=lex >] ->
+              match Buffer.contents buffer with
+              | "def" -> [< 'Token.Def; stream >]
+              | "extern" -> [< 'Token.Extern; stream >]
+              | "if" -> [< 'Token.If; stream >]
+              | "then" -> [< 'Token.Then; stream >]
+              | "else" -> [< 'Token.Else; stream >]
+              | "for" -> [< 'Token.For; stream >]
+              | "in" -> [< 'Token.In; stream >]
+              | id -> [< 'Token.Ident id; stream >]
+
+        and lex_comment = parser
+          | [< ' ('\n'); stream=lex >] -> stream
+          | [< 'c; e=lex_comment >] -> e
+          | [< >] -> [< >]
+
+ast.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Abstract Syntax Tree (aka Parse Tree)
+         *===----------------------------------------------------------------------===*)
+
+        (* expr - Base type for all expression nodes. *)
+        type expr =
+          (* variant for numeric literals like "1.0". *)
+          | Number of float
+
+          (* variant for referencing a variable, like "a". *)
+          | Variable of string
+
+          (* variant for a binary operator. *)
+          | Binary of char * expr * expr
+
+          (* variant for function calls. *)
+          | Call of string * expr array
+
+          (* variant for if/then/else. *)
+          | If of expr * expr * expr
+
+          (* variant for for/in. *)
+          | For of string * expr * expr * expr option * expr
+
+        (* proto - This type represents the "prototype" for a function, which captures
+         * its name, and its argument names (thus implicitly the number of arguments the
+         * function takes). *)
+        type proto = Prototype of string * string array
+
+        (* func - This type represents a function definition itself. *)
+        type func = Function of proto * expr
+
+parser.ml:
+    .. code-block:: ocaml
+
+        (*===---------------------------------------------------------------------===
+         * Parser
+         *===---------------------------------------------------------------------===*)
+
+        (* binop_precedence - This holds the precedence for each binary operator that is
+         * defined *)
+        let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+        (* precedence - Get the precedence of the pending binary operator token. *)
+        let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+        (* primary
+         *   ::= identifier
+         *   ::= numberexpr
+         *   ::= parenexpr
+         *   ::= ifexpr
+         *   ::= forexpr *)
+        let rec parse_primary = parser
+          (* numberexpr ::= number *)
+          | [< 'Token.Number n >] -> Ast.Number n
+
+          (* parenexpr ::= '(' expression ')' *)
+          | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+          (* identifierexpr
+           *   ::= identifier
+           *   ::= identifier '(' argumentexpr ')' *)
+          | [< 'Token.Ident id; stream >] ->
+              let rec parse_args accumulator = parser
+                | [< e=parse_expr; stream >] ->
+                    begin parser
+                      | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+                      | [< >] -> e :: accumulator
+                    end stream
+                | [< >] -> accumulator
+              in
+              let rec parse_ident id = parser
+                (* Call. *)
+                | [< 'Token.Kwd '(';
+                     args=parse_args [];
+                     'Token.Kwd ')' ?? "expected ')'">] ->
+                    Ast.Call (id, Array.of_list (List.rev args))
+
+                (* Simple variable ref. *)
+                | [< >] -> Ast.Variable id
+              in
+              parse_ident id stream
+
+          (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+          | [< 'Token.If; c=parse_expr;
+               'Token.Then ?? "expected 'then'"; t=parse_expr;
+               'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
+              Ast.If (c, t, e)
+
+          (* forexpr
+                ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
+          | [< 'Token.For;
+               'Token.Ident id ?? "expected identifier after for";
+               'Token.Kwd '=' ?? "expected '=' after for";
+               stream >] ->
+              begin parser
+                | [<
+                     start=parse_expr;
+                     'Token.Kwd ',' ?? "expected ',' after for";
+                     end_=parse_expr;
+                     stream >] ->
+                    let step =
+                      begin parser
+                      | [< 'Token.Kwd ','; step=parse_expr >] -> Some step
+                      | [< >] -> None
+                      end stream
+                    in
+                    begin parser
+                    | [< 'Token.In; body=parse_expr >] ->
+                        Ast.For (id, start, end_, step, body)
+                    | [< >] ->
+                        raise (Stream.Error "expected 'in' after for")
+                    end stream
+                | [< >] ->
+                    raise (Stream.Error "expected '=' after for")
+              end stream
+
+          | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+        (* binoprhs
+         *   ::= ('+' primary)* *)
+        and parse_bin_rhs expr_prec lhs stream =
+          match Stream.peek stream with
+          (* If this is a binop, find its precedence. *)
+          | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+              let token_prec = precedence c in
+
+              (* If this is a binop that binds at least as tightly as the current binop,
+               * consume it, otherwise we are done. *)
+              if token_prec < expr_prec then lhs else begin
+                (* Eat the binop. *)
+                Stream.junk stream;
+
+                (* Parse the primary expression after the binary operator. *)
+                let rhs = parse_primary stream in
+
+                (* Okay, we know this is a binop. *)
+                let rhs =
+                  match Stream.peek stream with
+                  | Some (Token.Kwd c2) ->
+                      (* If BinOp binds less tightly with rhs than the operator after
+                       * rhs, let the pending operator take rhs as its lhs. *)
+                      let next_prec = precedence c2 in
+                      if token_prec < next_prec
+                      then parse_bin_rhs (token_prec + 1) rhs stream
+                      else rhs
+                  | _ -> rhs
+                in
+
+                (* Merge lhs/rhs. *)
+                let lhs = Ast.Binary (c, lhs, rhs) in
+                parse_bin_rhs expr_prec lhs stream
+              end
+          | _ -> lhs
+
+        (* expression
+         *   ::= primary binoprhs *)
+        and parse_expr = parser
+          | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
+
+        (* prototype
+         *   ::= id '(' id* ')' *)
+        let parse_prototype =
+          let rec parse_args accumulator = parser
+            | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+            | [< >] -> accumulator
+          in
+
+          parser
+          | [< 'Token.Ident id;
+               'Token.Kwd '(' ?? "expected '(' in prototype";
+               args=parse_args [];
+               'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+              (* success. *)
+              Ast.Prototype (id, Array.of_list (List.rev args))
+
+          | [< >] ->
+              raise (Stream.Error "expected function name in prototype")
+
+        (* definition ::= 'def' prototype expression *)
+        let parse_definition = parser
+          | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+              Ast.Function (p, e)
+
+        (* toplevelexpr ::= expression *)
+        let parse_toplevel = parser
+          | [< e=parse_expr >] ->
+              (* Make an anonymous proto. *)
+              Ast.Function (Ast.Prototype ("", [||]), e)
+
+        (*  external ::= 'extern' prototype *)
+        let parse_extern = parser
+          | [< 'Token.Extern; e=parse_prototype >] -> e
+
+codegen.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Code Generation
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+
+        exception Error of string
+
+        let context = global_context ()
+        let the_module = create_module context "my cool jit"
+        let builder = builder context
+        let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+        let double_type = double_type context
+
+        let rec codegen_expr = function
+          | Ast.Number n -> const_float double_type n
+          | Ast.Variable name ->
+              (try Hashtbl.find named_values name with
+                | Not_found -> raise (Error "unknown variable name"))
+          | Ast.Binary (op, lhs, rhs) ->
+              let lhs_val = codegen_expr lhs in
+              let rhs_val = codegen_expr rhs in
+              begin
+                match op with
+                | '+' -> build_add lhs_val rhs_val "addtmp" builder
+                | '-' -> build_sub lhs_val rhs_val "subtmp" builder
+                | '*' -> build_mul lhs_val rhs_val "multmp" builder
+                | '<' ->
+                    (* Convert bool 0/1 to double 0.0 or 1.0 *)
+                    let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+                    build_uitofp i double_type "booltmp" builder
+                | _ -> raise (Error "invalid binary operator")
+              end
+          | Ast.Call (callee, args) ->
+              (* Look up the name in the module table. *)
+              let callee =
+                match lookup_function callee the_module with
+                | Some callee -> callee
+                | None -> raise (Error "unknown function referenced")
+              in
+              let params = params callee in
+
+              (* If argument mismatch error. *)
+              if Array.length params == Array.length args then () else
+                raise (Error "incorrect # arguments passed");
+              let args = Array.map codegen_expr args in
+              build_call callee args "calltmp" builder
+          | Ast.If (cond, then_, else_) ->
+              let cond = codegen_expr cond in
+
+              (* Convert condition to a bool by comparing equal to 0.0 *)
+              let zero = const_float double_type 0.0 in
+              let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
+
+              (* Grab the first block so that we might later add the conditional branch
+               * to it at the end of the function. *)
+              let start_bb = insertion_block builder in
+              let the_function = block_parent start_bb in
+
+              let then_bb = append_block context "then" the_function in
+
+              (* Emit 'then' value. *)
+              position_at_end then_bb builder;
+              let then_val = codegen_expr then_ in
+
+              (* Codegen of 'then' can change the current block, update then_bb for the
+               * phi. We create a new name because one is used for the phi node, and the
+               * other is used for the conditional branch. *)
+              let new_then_bb = insertion_block builder in
+
+              (* Emit 'else' value. *)
+              let else_bb = append_block context "else" the_function in
+              position_at_end else_bb builder;
+              let else_val = codegen_expr else_ in
+
+              (* Codegen of 'else' can change the current block, update else_bb for the
+               * phi. *)
+              let new_else_bb = insertion_block builder in
+
+              (* Emit merge block. *)
+              let merge_bb = append_block context "ifcont" the_function in
+              position_at_end merge_bb builder;
+              let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
+              let phi = build_phi incoming "iftmp" builder in
+
+              (* Return to the start block to add the conditional branch. *)
+              position_at_end start_bb builder;
+              ignore (build_cond_br cond_val then_bb else_bb builder);
+
+              (* Set a unconditional branch at the end of the 'then' block and the
+               * 'else' block to the 'merge' block. *)
+              position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
+              position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
+
+              (* Finally, set the builder to the end of the merge block. *)
+              position_at_end merge_bb builder;
+
+              phi
+          | Ast.For (var_name, start, end_, step, body) ->
+              (* Emit the start code first, without 'variable' in scope. *)
+              let start_val = codegen_expr start in
+
+              (* Make the new basic block for the loop header, inserting after current
+               * block. *)
+              let preheader_bb = insertion_block builder in
+              let the_function = block_parent preheader_bb in
+              let loop_bb = append_block context "loop" the_function in
+
+              (* Insert an explicit fall through from the current block to the
+               * loop_bb. *)
+              ignore (build_br loop_bb builder);
+
+              (* Start insertion in loop_bb. *)
+              position_at_end loop_bb builder;
+
+              (* Start the PHI node with an entry for start. *)
+              let variable = build_phi [(start_val, preheader_bb)] var_name builder in
+
+              (* Within the loop, the variable is defined equal to the PHI node. If it
+               * shadows an existing variable, we have to restore it, so save it
+               * now. *)
+              let old_val =
+                try Some (Hashtbl.find named_values var_name) with Not_found -> None
+              in
+              Hashtbl.add named_values var_name variable;
+
+              (* Emit the body of the loop.  This, like any other expr, can change the
+               * current BB.  Note that we ignore the value computed by the body, but
+               * don't allow an error *)
+              ignore (codegen_expr body);
+
+              (* Emit the step value. *)
+              let step_val =
+                match step with
+                | Some step -> codegen_expr step
+                (* If not specified, use 1.0. *)
+                | None -> const_float double_type 1.0
+              in
+
+              let next_var = build_add variable step_val "nextvar" builder in
+
+              (* Compute the end condition. *)
+              let end_cond = codegen_expr end_ in
+
+              (* Convert condition to a bool by comparing equal to 0.0. *)
+              let zero = const_float double_type 0.0 in
+              let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
+
+              (* Create the "after loop" block and insert it. *)
+              let loop_end_bb = insertion_block builder in
+              let after_bb = append_block context "afterloop" the_function in
+
+              (* Insert the conditional branch into the end of loop_end_bb. *)
+              ignore (build_cond_br end_cond loop_bb after_bb builder);
+
+              (* Any new code will be inserted in after_bb. *)
+              position_at_end after_bb builder;
+
+              (* Add a new entry to the PHI node for the backedge. *)
+              add_incoming (next_var, loop_end_bb) variable;
+
+              (* Restore the unshadowed variable. *)
+              begin match old_val with
+              | Some old_val -> Hashtbl.add named_values var_name old_val
+              | None -> ()
+              end;
+
+              (* for expr always returns 0.0. *)
+              const_null double_type
+
+        let codegen_proto = function
+          | Ast.Prototype (name, args) ->
+              (* Make the function type: double(double,double) etc. *)
+              let doubles = Array.make (Array.length args) double_type in
+              let ft = function_type double_type doubles in
+              let f =
+                match lookup_function name the_module with
+                | None -> declare_function name ft the_module
+
+                (* If 'f' conflicted, there was already something named 'name'. If it
+                 * has a body, don't allow redefinition or reextern. *)
+                | Some f ->
+                    (* If 'f' already has a body, reject this. *)
+                    if block_begin f <> At_end f then
+                      raise (Error "redefinition of function");
+
+                    (* If 'f' took a different number of arguments, reject. *)
+                    if element_type (type_of f) <> ft then
+                      raise (Error "redefinition of function with different # args");
+                    f
+              in
+
+              (* Set names for all arguments. *)
+              Array.iteri (fun i a ->
+                let n = args.(i) in
+                set_value_name n a;
+                Hashtbl.add named_values n a;
+              ) (params f);
+              f
+
+        let codegen_func the_fpm = function
+          | Ast.Function (proto, body) ->
+              Hashtbl.clear named_values;
+              let the_function = codegen_proto proto in
+
+              (* Create a new basic block to start insertion into. *)
+              let bb = append_block context "entry" the_function in
+              position_at_end bb builder;
+
+              try
+                let ret_val = codegen_expr body in
+
+                (* Finish off the function. *)
+                let _ = build_ret ret_val builder in
+
+                (* Validate the generated code, checking for consistency. *)
+                Llvm_analysis.assert_valid_function the_function;
+
+                (* Optimize the function. *)
+                let _ = PassManager.run_function the_function the_fpm in
+
+                the_function
+              with e ->
+                delete_function the_function;
+                raise e
+
+toplevel.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Top-Level parsing and JIT Driver
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+        open Llvm_executionengine
+
+        (* top ::= definition | external | expression | ';' *)
+        let rec main_loop the_fpm the_execution_engine stream =
+          match Stream.peek stream with
+          | None -> ()
+
+          (* ignore top-level semicolons. *)
+          | Some (Token.Kwd ';') ->
+              Stream.junk stream;
+              main_loop the_fpm the_execution_engine stream
+
+          | Some token ->
+              begin
+                try match token with
+                | Token.Def ->
+                    let e = Parser.parse_definition stream in
+                    print_endline "parsed a function definition.";
+                    dump_value (Codegen.codegen_func the_fpm e);
+                | Token.Extern ->
+                    let e = Parser.parse_extern stream in
+                    print_endline "parsed an extern.";
+                    dump_value (Codegen.codegen_proto e);
+                | _ ->
+                    (* Evaluate a top-level expression into an anonymous function. *)
+                    let e = Parser.parse_toplevel stream in
+                    print_endline "parsed a top-level expr";
+                    let the_function = Codegen.codegen_func the_fpm e in
+                    dump_value the_function;
+
+                    (* JIT the function, returning a function pointer. *)
+                    let result = ExecutionEngine.run_function the_function [||]
+                      the_execution_engine in
+
+                    print_string "Evaluated to ";
+                    print_float (GenericValue.as_float Codegen.double_type result);
+                    print_newline ();
+                with Stream.Error s | Codegen.Error s ->
+                  (* Skip token for error recovery. *)
+                  Stream.junk stream;
+                  print_endline s;
+              end;
+              print_string "ready> "; flush stdout;
+              main_loop the_fpm the_execution_engine stream
+
+toy.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Main driver code.
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+        open Llvm_executionengine
+        open Llvm_target
+        open Llvm_scalar_opts
+
+        let main () =
+          ignore (initialize_native_target ());
+
+          (* Install standard binary operators.
+           * 1 is the lowest precedence. *)
+          Hashtbl.add Parser.binop_precedence '<' 10;
+          Hashtbl.add Parser.binop_precedence '+' 20;
+          Hashtbl.add Parser.binop_precedence '-' 20;
+          Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+          (* Prime the first token. *)
+          print_string "ready> "; flush stdout;
+          let stream = Lexer.lex (Stream.of_channel stdin) in
+
+          (* Create the JIT. *)
+          let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+          let the_fpm = PassManager.create_function Codegen.the_module in
+
+          (* Set up the optimizer pipeline.  Start with registering info about how the
+           * target lays out data structures. *)
+          DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+          (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+          add_instruction_combination the_fpm;
+
+          (* reassociate expressions. *)
+          add_reassociation the_fpm;
+
+          (* Eliminate Common SubExpressions. *)
+          add_gvn the_fpm;
+
+          (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+          add_cfg_simplification the_fpm;
+
+          ignore (PassManager.initialize the_fpm);
+
+          (* Run the main "interpreter loop" now. *)
+          Toplevel.main_loop the_fpm the_execution_engine stream;
+
+          (* Print out all the generated code. *)
+          dump_module Codegen.the_module
+        ;;
+
+        main ()
+
+bindings.c
+    .. code-block:: c
+
+        #include <stdio.h>
+
+        /* putchard - putchar that takes a double and returns 0. */
+        extern double putchard(double X) {
+          putchar((char)X);
+          return 0;
+        }
+
+`Next: Extending the language: user-defined
+operators <OCamlLangImpl6.html>`_
+
diff --git a/docs/tutorial/OCamlLangImpl6.html b/docs/tutorial/OCamlLangImpl6.html
deleted file mode 100644
index 56883d539b..0000000000
--- a/docs/tutorial/OCamlLangImpl6.html
+++ /dev/null
@@ -1,1574 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Extending the Language: User-defined Operators</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <meta name="author" content="Erick Tryzelaar">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Extending the Language: User-defined Operators</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 6
-  <ol>
-    <li><a href="#intro">Chapter 6 Introduction</a></li>
-    <li><a href="#idea">User-defined Operators: the Idea</a></li>
-    <li><a href="#binary">User-defined Binary Operators</a></li>
-    <li><a href="#unary">User-defined Unary Operators</a></li>
-    <li><a href="#example">Kicking the Tires</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="OCamlLangImpl7.html">Chapter 7</a>: Extending the Language: Mutable
-Variables / SSA Construction</li>
-</ul>
-
-<div class="doc_author">
-	<p>
-		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
-	</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 6 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 6 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  At this point in our tutorial, we now have a fully
-functional language that is fairly minimal, but also useful.  There
-is still one big problem with it, however. Our language doesn't have many
-useful operators (like division, logical negation, or even any comparisons
-besides less-than).</p>
-
-<p>This chapter of the tutorial takes a wild digression into adding user-defined
-operators to the simple and beautiful Kaleidoscope language. This digression now
-gives us a simple and ugly language in some ways, but also a powerful one at the
-same time.  One of the great things about creating your own language is that you
-get to decide what is good or bad.  In this tutorial we'll assume that it is
-okay to use this as a way to show some interesting parsing techniques.</p>
-
-<p>At the end of this tutorial, we'll run through an example Kaleidoscope
-application that <a href="#example">renders the Mandelbrot set</a>.  This gives
-an example of what you can build with Kaleidoscope and its feature set.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="idea">User-defined Operators: the Idea</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-The "operator overloading" that we will add to Kaleidoscope is more general than
-languages like C++.  In C++, you are only allowed to redefine existing
-operators: you can't programatically change the grammar, introduce new
-operators, change precedence levels, etc.  In this chapter, we will add this
-capability to Kaleidoscope, which will let the user round out the set of
-operators that are supported.</p>
-
-<p>The point of going into user-defined operators in a tutorial like this is to
-show the power and flexibility of using a hand-written parser.  Thus far, the parser
-we have been implementing uses recursive descent for most parts of the grammar and
-operator precedence parsing for the expressions.  See <a
-href="OCamlLangImpl2.html">Chapter 2</a> for details.  Without using operator
-precedence parsing, it would be very difficult to allow the programmer to
-introduce new operators into the grammar: the grammar is dynamically extensible
-as the JIT runs.</p>
-
-<p>The two specific features we'll add are programmable unary operators (right
-now, Kaleidoscope has no unary operators at all) as well as binary operators.
-An example of this is:</p>
-
-<div class="doc_code">
-<pre>
-# Logical unary not.
-def unary!(v)
-  if v then
-    0
-  else
-    1;
-
-# Define &gt; with the same precedence as &lt;.
-def binary&gt; 10 (LHS RHS)
-  RHS &lt; LHS;
-
-# Binary "logical or", (note that it does not "short circuit")
-def binary| 5 (LHS RHS)
-  if LHS then
-    1
-  else if RHS then
-    1
-  else
-    0;
-
-# Define = with slightly lower precedence than relationals.
-def binary= 9 (LHS RHS)
-  !(LHS &lt; RHS | LHS &gt; RHS);
-</pre>
-</div>
-
-<p>Many languages aspire to being able to implement their standard runtime
-library in the language itself.  In Kaleidoscope, we can implement significant
-parts of the language in the library!</p>
-
-<p>We will break down implementation of these features into two parts:
-implementing support for user-defined binary operators and adding unary
-operators.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="binary">User-defined Binary Operators</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Adding support for user-defined binary operators is pretty simple with our
-current framework.  We'll first add support for the unary/binary keywords:</p>
-
-<div class="doc_code">
-<pre>
-type token =
-  ...
-  <b>(* operators *)
-  | Binary | Unary</b>
-
-...
-
-and lex_ident buffer = parser
-  ...
-      | "for" -&gt; [&lt; 'Token.For; stream &gt;]
-      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
-      <b>| "binary" -&gt; [&lt; 'Token.Binary; stream &gt;]
-      | "unary" -&gt; [&lt; 'Token.Unary; stream &gt;]</b>
-</pre>
-</div>
-
-<p>This just adds lexer support for the unary and binary keywords, like we
-did in <a href="OCamlLangImpl5.html#iflexer">previous chapters</a>.  One nice
-thing about our current AST, is that we represent binary operators with full
-generalisation by using their ASCII code as the opcode.  For our extended
-operators, we'll use this same representation, so we don't need any new AST or
-parser support.</p>
-
-<p>On the other hand, we have to be able to represent the definitions of these
-new operators, in the "def binary| 5" part of the function definition.  In our
-grammar so far, the "name" for the function definition is parsed as the
-"prototype" production and into the <tt>Ast.Prototype</tt> AST node.  To
-represent our new user-defined operators as prototypes, we have to extend
-the  <tt>Ast.Prototype</tt> AST node like this:</p>
-
-<div class="doc_code">
-<pre>
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto =
-  | Prototype of string * string array
-  <b>| BinOpPrototype of string * string array * int</b>
-</pre>
-</div>
-
-<p>Basically, in addition to knowing a name for the prototype, we now keep track
-of whether it was an operator, and if it was, what precedence level the operator
-is at.  The precedence is only used for binary operators (as you'll see below,
-it just doesn't apply for unary operators).  Now that we have a way to represent
-the prototype for a user-defined operator, we need to parse it:</p>
-
-<div class="doc_code">
-<pre>
-(* prototype
- *   ::= id '(' id* ')'
- <b>*   ::= binary LETTER number? (id, id)
- *   ::= unary LETTER number? (id) *)</b>
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-  let parse_operator = parser
-    | [&lt; 'Token.Unary &gt;] -&gt; "unary", 1
-    | [&lt; 'Token.Binary &gt;] -&gt; "binary", 2
-  in
-  let parse_binary_precedence = parser
-    | [&lt; 'Token.Number n &gt;] -&gt; int_of_float n
-    | [&lt; &gt;] -&gt; 30
-  in
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-  <b>| [&lt; (prefix, kind)=parse_operator;
-       'Token.Kwd op ?? "expected an operator";
-       (* Read the precedence if present. *)
-       binary_precedence=parse_binary_precedence;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-        args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      let name = prefix ^ (String.make 1 op) in
-      let args = Array.of_list (List.rev args) in
-
-      (* Verify right number of arguments for operator. *)
-      if Array.length args != kind
-      then raise (Stream.Error "invalid number of operands for operator")
-      else
-        if kind == 1 then
-          Ast.Prototype (name, args)
-        else
-          Ast.BinOpPrototype (name, args, binary_precedence)</b>
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-</pre>
-</div>
-
-<p>This is all fairly straightforward parsing code, and we have already seen
-a lot of similar code in the past.  One interesting part about the code above is
-the couple lines that set up <tt>name</tt> for binary operators.  This builds
-names like "binary@" for a newly defined "@" operator.  This then takes
-advantage of the fact that symbol names in the LLVM symbol table are allowed to
-have any character in them, including embedded nul characters.</p>
-
-<p>The next interesting thing to add, is codegen support for these binary
-operators.  Given our current structure, this is a simple addition of a default
-case for our existing binary operator node:</p>
-
-<div class="doc_code">
-<pre>
-let codegen_expr = function
-  ...
-  | Ast.Binary (op, lhs, rhs) -&gt;
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
-        | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
-        | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
-        | '&lt;' -&gt;
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        <b>| _ -&gt;
-            (* If it wasn't a builtin binary operator, it must be a user defined
-             * one. Emit a call to it. *)
-            let callee = "binary" ^ (String.make 1 op) in
-            let callee =
-              match lookup_function callee the_module with
-              | Some callee -&gt; callee
-              | None -&gt; raise (Error "binary operator not found!")
-            in
-            build_call callee [|lhs_val; rhs_val|] "binop" builder</b>
-      end
-</pre>
-</div>
-
-<p>As you can see above, the new code is actually really simple.  It just does
-a lookup for the appropriate operator in the symbol table and generates a
-function call to it.  Since user-defined operators are just built as normal
-functions (because the "prototype" boils down to a function with the right
-name) everything falls into place.</p>
-
-<p>The final piece of code we are missing, is a bit of top level magic:</p>
-
-<div class="doc_code">
-<pre>
-let codegen_func the_fpm = function
-  | Ast.Function (proto, body) -&gt;
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      <b>(* If this is an operator, install it. *)
-      begin match proto with
-      | Ast.BinOpPrototype (name, args, prec) -&gt;
-          let op = name.[String.length name - 1] in
-          Hashtbl.add Parser.binop_precedence op prec;
-      | _ -&gt; ()
-      end;</b>
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-      ...
-</pre>
-</div>
-
-<p>Basically, before codegening a function, if it is a user-defined operator, we
-register it in the precedence table.  This allows the binary operator parsing
-logic we already have in place to handle it.  Since we are working on a
-fully-general operator precedence parser, this is all we need to do to "extend
-the grammar".</p>
-
-<p>Now we have useful user-defined binary operators.  This builds a lot
-on the previous framework we built for other operators.  Adding unary operators
-is a bit more challenging, because we don't have any framework for it yet - lets
-see what it takes.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="unary">User-defined Unary Operators</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Since we don't currently support unary operators in the Kaleidoscope
-language, we'll need to add everything to support them.  Above, we added simple
-support for the 'unary' keyword to the lexer.  In addition to that, we need an
-AST node:</p>
-
-<div class="doc_code">
-<pre>
-type expr =
-  ...
-  (* variant for a unary operator. *)
-  | Unary of char * expr
-  ...
-</pre>
-</div>
-
-<p>This AST node is very simple and obvious by now.  It directly mirrors the
-binary operator AST node, except that it only has one child.  With this, we
-need to add the parsing logic.  Parsing a unary operator is pretty simple: we'll
-add a new function to do it:</p>
-
-<div class="doc_code">
-<pre>
-(* unary
- *   ::= primary
- *   ::= '!' unary *)
-and parse_unary = parser
-  (* If this is a unary operator, read it. *)
-  | [&lt; 'Token.Kwd op when op != '(' &amp;&amp; op != ')'; operand=parse_expr &gt;] -&gt;
-      Ast.Unary (op, operand)
-
-  (* If the current token is not an operator, it must be a primary expr. *)
-  | [&lt; stream &gt;] -&gt; parse_primary stream
-</pre>
-</div>
-
-<p>The grammar we add is pretty straightforward here.  If we see a unary
-operator when parsing a primary operator, we eat the operator as a prefix and
-parse the remaining piece as another unary operator.  This allows us to handle
-multiple unary operators (e.g. "!!x").  Note that unary operators can't have
-ambiguous parses like binary operators can, so there is no need for precedence
-information.</p>
-
-<p>The problem with this function, is that we need to call ParseUnary from
-somewhere.  To do this, we change previous callers of ParsePrimary to call
-<tt>parse_unary</tt> instead:</p>
-
-<div class="doc_code">
-<pre>
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-        ...
-        <b>(* Parse the unary expression after the binary operator. *)
-        let rhs = parse_unary stream in</b>
-        ...
-
-...
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [&lt; lhs=<b>parse_unary</b>; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
-</pre>
-</div>
-
-<p>With these two simple changes, we are now able to parse unary operators and build the
-AST for them.  Next up, we need to add parser support for prototypes, to parse
-the unary operator prototype.  We extend the binary operator code above
-with:</p>
-
-<div class="doc_code">
-<pre>
-(* prototype
- *   ::= id '(' id* ')'
- *   ::= binary LETTER number? (id, id)
- <b>*   ::= unary LETTER number? (id)</b> *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-  <b>let parse_operator = parser
-    | [&lt; 'Token.Unary &gt;] -&gt; "unary", 1
-    | [&lt; 'Token.Binary &gt;] -&gt; "binary", 2
-  in</b>
-  let parse_binary_precedence = parser
-    | [&lt; 'Token.Number n &gt;] -&gt; int_of_float n
-    | [&lt; &gt;] -&gt; 30
-  in
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-  <b>| [&lt; (prefix, kind)=parse_operator;
-       'Token.Kwd op ?? "expected an operator";
-       (* Read the precedence if present. *)
-       binary_precedence=parse_binary_precedence;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-        args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      let name = prefix ^ (String.make 1 op) in
-      let args = Array.of_list (List.rev args) in
-
-      (* Verify right number of arguments for operator. *)
-      if Array.length args != kind
-      then raise (Stream.Error "invalid number of operands for operator")
-      else
-        if kind == 1 then
-          Ast.Prototype (name, args)
-        else
-          Ast.BinOpPrototype (name, args, binary_precedence)</b>
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-</pre>
-</div>
-
-<p>As with binary operators, we name unary operators with a name that includes
-the operator character.  This assists us at code generation time.  Speaking of,
-the final piece we need to add is codegen support for unary operators.  It looks
-like this:</p>
-
-<div class="doc_code">
-<pre>
-let rec codegen_expr = function
-  ...
-  | Ast.Unary (op, operand) -&gt;
-      let operand = codegen_expr operand in
-      let callee = "unary" ^ (String.make 1 op) in
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown unary operator")
-      in
-      build_call callee [|operand|] "unop" builder
-</pre>
-</div>
-
-<p>This code is similar to, but simpler than, the code for binary operators.  It
-is simpler primarily because it doesn't need to handle any predefined operators.
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="example">Kicking the Tires</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>It is somewhat hard to believe, but with a few simple extensions we've
-covered in the last chapters, we have grown a real-ish language.  With this, we
-can do a lot of interesting things, including I/O, math, and a bunch of other
-things.  For example, we can now add a nice sequencing operator (printd is
-defined to print out the specified value and a newline):</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>extern printd(x);</b>
-Read extern: declare double @printd(double)
-ready&gt; <b>def binary : 1 (x y) 0;  # Low-precedence operator that ignores operands.</b>
-..
-ready&gt; <b>printd(123) : printd(456) : printd(789);</b>
-123.000000
-456.000000
-789.000000
-Evaluated to 0.000000
-</pre>
-</div>
-
-<p>We can also define a bunch of other "primitive" operations, such as:</p>
-
-<div class="doc_code">
-<pre>
-# Logical unary not.
-def unary!(v)
-  if v then
-    0
-  else
-    1;
-
-# Unary negate.
-def unary-(v)
-  0-v;
-
-# Define &gt; with the same precedence as &lt;.
-def binary&gt; 10 (LHS RHS)
-  RHS &lt; LHS;
-
-# Binary logical or, which does not short circuit.
-def binary| 5 (LHS RHS)
-  if LHS then
-    1
-  else if RHS then
-    1
-  else
-    0;
-
-# Binary logical and, which does not short circuit.
-def binary&amp; 6 (LHS RHS)
-  if !LHS then
-    0
-  else
-    !!RHS;
-
-# Define = with slightly lower precedence than relationals.
-def binary = 9 (LHS RHS)
-  !(LHS &lt; RHS | LHS &gt; RHS);
-
-</pre>
-</div>
-
-
-<p>Given the previous if/then/else support, we can also define interesting
-functions for I/O.  For example, the following prints out a character whose
-"density" reflects the value passed in: the lower the value, the denser the
-character:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt;
-<b>
-extern putchard(char)
-def printdensity(d)
-  if d &gt; 8 then
-    putchard(32)  # ' '
-  else if d &gt; 4 then
-    putchard(46)  # '.'
-  else if d &gt; 2 then
-    putchard(43)  # '+'
-  else
-    putchard(42); # '*'</b>
-...
-ready&gt; <b>printdensity(1): printdensity(2): printdensity(3) :
-          printdensity(4): printdensity(5): printdensity(9): putchard(10);</b>
-*++..
-Evaluated to 0.000000
-</pre>
-</div>
-
-<p>Based on these simple primitive operations, we can start to define more
-interesting things.  For example, here's a little function that solves for the
-number of iterations it takes a function in the complex plane to
-converge:</p>
-
-<div class="doc_code">
-<pre>
-# determine whether the specific location diverges.
-# Solve for z = z^2 + c in the complex plane.
-def mandleconverger(real imag iters creal cimag)
-  if iters &gt; 255 | (real*real + imag*imag &gt; 4) then
-    iters
-  else
-    mandleconverger(real*real - imag*imag + creal,
-                    2*real*imag + cimag,
-                    iters+1, creal, cimag);
-
-# return the number of iterations required for the iteration to escape
-def mandleconverge(real imag)
-  mandleconverger(real, imag, 0, real, imag);
-</pre>
-</div>
-
-<p>This "z = z<sup>2</sup> + c" function is a beautiful little creature that is the basis
-for computation of the <a
-href="http://en.wikipedia.org/wiki/Mandelbrot_set">Mandelbrot Set</a>.  Our
-<tt>mandelconverge</tt> function returns the number of iterations that it takes
-for a complex orbit to escape, saturating to 255.  This is not a very useful
-function by itself, but if you plot its value over a two-dimensional plane,
-you can see the Mandelbrot set.  Given that we are limited to using putchard
-here, our amazing graphical output is limited, but we can whip together
-something using the density plotter above:</p>
-
-<div class="doc_code">
-<pre>
-# compute and plot the mandlebrot set with the specified 2 dimensional range
-# info.
-def mandelhelp(xmin xmax xstep   ymin ymax ystep)
-  for y = ymin, y &lt; ymax, ystep in (
-    (for x = xmin, x &lt; xmax, xstep in
-       printdensity(mandleconverge(x,y)))
-    : putchard(10)
-  )
-
-# mandel - This is a convenient helper function for plotting the mandelbrot set
-# from the specified position with the specified Magnification.
-def mandel(realstart imagstart realmag imagmag)
-  mandelhelp(realstart, realstart+realmag*78, realmag,
-             imagstart, imagstart+imagmag*40, imagmag);
-</pre>
-</div>
-
-<p>Given this, we can try plotting out the mandlebrot set!  Lets try it out:</p>
-
-<div class="doc_code">
-<pre>
-ready&gt; <b>mandel(-2.3, -1.3, 0.05, 0.07);</b>
-*******************************+++++++++++*************************************
-*************************+++++++++++++++++++++++*******************************
-**********************+++++++++++++++++++++++++++++****************************
-*******************+++++++++++++++++++++.. ...++++++++*************************
-*****************++++++++++++++++++++++.... ...+++++++++***********************
-***************+++++++++++++++++++++++.....   ...+++++++++*********************
-**************+++++++++++++++++++++++....     ....+++++++++********************
-*************++++++++++++++++++++++......      .....++++++++*******************
-************+++++++++++++++++++++.......       .......+++++++******************
-***********+++++++++++++++++++....                ... .+++++++*****************
-**********+++++++++++++++++.......                     .+++++++****************
-*********++++++++++++++...........                    ...+++++++***************
-********++++++++++++............                      ...++++++++**************
-********++++++++++... ..........                        .++++++++**************
-*******+++++++++.....                                   .+++++++++*************
-*******++++++++......                                  ..+++++++++*************
-*******++++++.......                                   ..+++++++++*************
-*******+++++......                                     ..+++++++++*************
-*******.... ....                                      ...+++++++++*************
-*******.... .                                         ...+++++++++*************
-*******+++++......                                    ...+++++++++*************
-*******++++++.......                                   ..+++++++++*************
-*******++++++++......                                   .+++++++++*************
-*******+++++++++.....                                  ..+++++++++*************
-********++++++++++... ..........                        .++++++++**************
-********++++++++++++............                      ...++++++++**************
-*********++++++++++++++..........                     ...+++++++***************
-**********++++++++++++++++........                     .+++++++****************
-**********++++++++++++++++++++....                ... ..+++++++****************
-***********++++++++++++++++++++++.......       .......++++++++*****************
-************+++++++++++++++++++++++......      ......++++++++******************
-**************+++++++++++++++++++++++....      ....++++++++********************
-***************+++++++++++++++++++++++.....   ...+++++++++*********************
-*****************++++++++++++++++++++++....  ...++++++++***********************
-*******************+++++++++++++++++++++......++++++++*************************
-*********************++++++++++++++++++++++.++++++++***************************
-*************************+++++++++++++++++++++++*******************************
-******************************+++++++++++++************************************
-*******************************************************************************
-*******************************************************************************
-*******************************************************************************
-Evaluated to 0.000000
-ready&gt; <b>mandel(-2, -1, 0.02, 0.04);</b>
-**************************+++++++++++++++++++++++++++++++++++++++++++++++++++++
-***********************++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-*********************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.
-*******************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++...
-*****************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.....
-***************++++++++++++++++++++++++++++++++++++++++++++++++++++++++........
-**************++++++++++++++++++++++++++++++++++++++++++++++++++++++...........
-************+++++++++++++++++++++++++++++++++++++++++++++++++++++..............
-***********++++++++++++++++++++++++++++++++++++++++++++++++++........        .
-**********++++++++++++++++++++++++++++++++++++++++++++++.............
-********+++++++++++++++++++++++++++++++++++++++++++..................
-*******+++++++++++++++++++++++++++++++++++++++.......................
-******+++++++++++++++++++++++++++++++++++...........................
-*****++++++++++++++++++++++++++++++++............................
-*****++++++++++++++++++++++++++++...............................
-****++++++++++++++++++++++++++......   .........................
-***++++++++++++++++++++++++.........     ......    ...........
-***++++++++++++++++++++++............
-**+++++++++++++++++++++..............
-**+++++++++++++++++++................
-*++++++++++++++++++.................
-*++++++++++++++++............ ...
-*++++++++++++++..............
-*+++....++++................
-*..........  ...........
-*
-*..........  ...........
-*+++....++++................
-*++++++++++++++..............
-*++++++++++++++++............ ...
-*++++++++++++++++++.................
-**+++++++++++++++++++................
-**+++++++++++++++++++++..............
-***++++++++++++++++++++++............
-***++++++++++++++++++++++++.........     ......    ...........
-****++++++++++++++++++++++++++......   .........................
-*****++++++++++++++++++++++++++++...............................
-*****++++++++++++++++++++++++++++++++............................
-******+++++++++++++++++++++++++++++++++++...........................
-*******+++++++++++++++++++++++++++++++++++++++.......................
-********+++++++++++++++++++++++++++++++++++++++++++..................
-Evaluated to 0.000000
-ready&gt; <b>mandel(-0.9, -1.4, 0.02, 0.03);</b>
-*******************************************************************************
-*******************************************************************************
-*******************************************************************************
-**********+++++++++++++++++++++************************************************
-*+++++++++++++++++++++++++++++++++++++++***************************************
-+++++++++++++++++++++++++++++++++++++++++++++**********************************
-++++++++++++++++++++++++++++++++++++++++++++++++++*****************************
-++++++++++++++++++++++++++++++++++++++++++++++++++++++*************************
-+++++++++++++++++++++++++++++++++++++++++++++++++++++++++**********************
-+++++++++++++++++++++++++++++++++.........++++++++++++++++++*******************
-+++++++++++++++++++++++++++++++....   ......+++++++++++++++++++****************
-+++++++++++++++++++++++++++++.......  ........+++++++++++++++++++**************
-++++++++++++++++++++++++++++........   ........++++++++++++++++++++************
-+++++++++++++++++++++++++++.........     ..  ...+++++++++++++++++++++**********
-++++++++++++++++++++++++++...........        ....++++++++++++++++++++++********
-++++++++++++++++++++++++.............       .......++++++++++++++++++++++******
-+++++++++++++++++++++++.............        ........+++++++++++++++++++++++****
-++++++++++++++++++++++...........           ..........++++++++++++++++++++++***
-++++++++++++++++++++...........                .........++++++++++++++++++++++*
-++++++++++++++++++............                  ...........++++++++++++++++++++
-++++++++++++++++...............                 .............++++++++++++++++++
-++++++++++++++.................                 ...............++++++++++++++++
-++++++++++++..................                  .................++++++++++++++
-+++++++++..................                      .................+++++++++++++
-++++++........        .                               .........  ..++++++++++++
-++............                                         ......    ....++++++++++
-..............                                                    ...++++++++++
-..............                                                    ....+++++++++
-..............                                                    .....++++++++
-.............                                                    ......++++++++
-...........                                                     .......++++++++
-.........                                                       ........+++++++
-.........                                                       ........+++++++
-.........                                                           ....+++++++
-........                                                             ...+++++++
-.......                                                              ...+++++++
-                                                                    ....+++++++
-                                                                   .....+++++++
-                                                                    ....+++++++
-                                                                    ....+++++++
-                                                                    ....+++++++
-Evaluated to 0.000000
-ready&gt; <b>^D</b>
-</pre>
-</div>
-
-<p>At this point, you may be starting to realize that Kaleidoscope is a real
-and powerful language.  It may not be self-similar :), but it can be used to
-plot things that are!</p>
-
-<p>With this, we conclude the "adding user-defined operators" chapter of the
-tutorial.  We have successfully augmented our language, adding the ability to
-extend the language in the library, and we have shown how this can be used to
-build a simple but interesting end-user application in Kaleidoscope.  At this
-point, Kaleidoscope can build a variety of applications that are functional and
-can call functions with side-effects, but it can't actually define and mutate a
-variable itself.</p>
-
-<p>Strikingly, variable mutation is an important feature of some
-languages, and it is not at all obvious how to <a href="OCamlLangImpl7.html">add
-support for mutable variables</a> without having to add an "SSA construction"
-phase to your front-end.  In the next chapter, we will describe how you can
-add variable mutation without building SSA in your front-end.</p>
-
-</div>
-
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with the
-if/then/else and for expressions..  To build this example, use:
-</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<dl>
-<dt>_tags:</dt>
-<dd class="doc_code">
-<pre>
-&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
-&lt;*.{byte,native}&gt;: g++, use_llvm, use_llvm_analysis
-&lt;*.{byte,native}&gt;: use_llvm_executionengine, use_llvm_target
-&lt;*.{byte,native}&gt;: use_llvm_scalar_opts, use_bindings
-</pre>
-</dd>
-
-<dt>myocamlbuild.ml:</dt>
-<dd class="doc_code">
-<pre>
-open Ocamlbuild_plugin;;
-
-ocaml_lib ~extern:true "llvm";;
-ocaml_lib ~extern:true "llvm_analysis";;
-ocaml_lib ~extern:true "llvm_executionengine";;
-ocaml_lib ~extern:true "llvm_target";;
-ocaml_lib ~extern:true "llvm_scalar_opts";;
-
-flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);;
-dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
-</pre>
-</dd>
-
-<dt>token.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-
-  (* control *)
-  | If | Then | Else
-  | For | In
-
-  (* operators *)
-  | Binary | Unary
-</pre>
-</dd>
-
-<dt>lexer.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [&lt; ' ('#'); stream &gt;] -&gt;
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [&lt; 'c; stream &gt;] -&gt;
-      [&lt; 'Token.Kwd c; lex stream &gt;]
-
-  (* end of stream. *)
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-
-and lex_number buffer = parser
-  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
-
-and lex_ident buffer = parser
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | "if" -&gt; [&lt; 'Token.If; stream &gt;]
-      | "then" -&gt; [&lt; 'Token.Then; stream &gt;]
-      | "else" -&gt; [&lt; 'Token.Else; stream &gt;]
-      | "for" -&gt; [&lt; 'Token.For; stream &gt;]
-      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
-      | "binary" -&gt; [&lt; 'Token.Binary; stream &gt;]
-      | "unary" -&gt; [&lt; 'Token.Unary; stream &gt;]
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-
-and lex_comment = parser
-  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
-  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-</pre>
-</dd>
-
-<dt>ast.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a unary operator. *)
-  | Unary of char * expr
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-  (* variant for if/then/else. *)
-  | If of expr * expr * expr
-
-  (* variant for for/in. *)
-  | For of string * expr * expr * expr option * expr
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto =
-  | Prototype of string * string array
-  | BinOpPrototype of string * string array * int
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-</pre>
-</dd>
-
-<dt>parser.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr
- *   ::= ifexpr
- *   ::= forexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
-      let rec parse_args accumulator = parser
-        | [&lt; e=parse_expr; stream &gt;] -&gt;
-            begin parser
-              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
-              | [&lt; &gt;] -&gt; e :: accumulator
-            end stream
-        | [&lt; &gt;] -&gt; accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [&lt; 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [&lt; &gt;] -&gt; Ast.Variable id
-      in
-      parse_ident id stream
-
-  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
-  | [&lt; 'Token.If; c=parse_expr;
-       'Token.Then ?? "expected 'then'"; t=parse_expr;
-       'Token.Else ?? "expected 'else'"; e=parse_expr &gt;] -&gt;
-      Ast.If (c, t, e)
-
-  (* forexpr
-        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
-  | [&lt; 'Token.For;
-       'Token.Ident id ?? "expected identifier after for";
-       'Token.Kwd '=' ?? "expected '=' after for";
-       stream &gt;] -&gt;
-      begin parser
-        | [&lt;
-             start=parse_expr;
-             'Token.Kwd ',' ?? "expected ',' after for";
-             end_=parse_expr;
-             stream &gt;] -&gt;
-            let step =
-              begin parser
-              | [&lt; 'Token.Kwd ','; step=parse_expr &gt;] -&gt; Some step
-              | [&lt; &gt;] -&gt; None
-              end stream
-            in
-            begin parser
-            | [&lt; 'Token.In; body=parse_expr &gt;] -&gt;
-                Ast.For (id, start, end_, step, body)
-            | [&lt; &gt;] -&gt;
-                raise (Stream.Error "expected 'in' after for")
-            end stream
-        | [&lt; &gt;] -&gt;
-            raise (Stream.Error "expected '=' after for")
-      end stream
-
-  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
-
-(* unary
- *   ::= primary
- *   ::= '!' unary *)
-and parse_unary = parser
-  (* If this is a unary operator, read it. *)
-  | [&lt; 'Token.Kwd op when op != '(' &amp;&amp; op != ')'; operand=parse_expr &gt;] -&gt;
-      Ast.Unary (op, operand)
-
-  (* If the current token is not an operator, it must be a primary expr. *)
-  | [&lt; stream &gt;] -&gt; parse_primary stream
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec &lt; expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the unary expression after the binary operator. *)
-        let rhs = parse_unary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) -&gt;
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec &lt; next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -&gt; rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -&gt; lhs
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [&lt; lhs=parse_unary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')'
- *   ::= binary LETTER number? (id, id)
- *   ::= unary LETTER number? (id) *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-  let parse_operator = parser
-    | [&lt; 'Token.Unary &gt;] -&gt; "unary", 1
-    | [&lt; 'Token.Binary &gt;] -&gt; "binary", 2
-  in
-  let parse_binary_precedence = parser
-    | [&lt; 'Token.Number n &gt;] -&gt; int_of_float n
-    | [&lt; &gt;] -&gt; 30
-  in
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-  | [&lt; (prefix, kind)=parse_operator;
-       'Token.Kwd op ?? "expected an operator";
-       (* Read the precedence if present. *)
-       binary_precedence=parse_binary_precedence;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-        args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      let name = prefix ^ (String.make 1 op) in
-      let args = Array.of_list (List.rev args) in
-
-      (* Verify right number of arguments for operator. *)
-      if Array.length args != kind
-      then raise (Stream.Error "invalid number of operands for operator")
-      else
-        if kind == 1 then
-          Ast.Prototype (name, args)
-        else
-          Ast.BinOpPrototype (name, args, binary_precedence)
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [&lt; e=parse_expr &gt;] -&gt;
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
-</pre>
-</dd>
-
-<dt>codegen.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Code Generation
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-
-let rec codegen_expr = function
-  | Ast.Number n -&gt; const_float double_type n
-  | Ast.Variable name -&gt;
-      (try Hashtbl.find named_values name with
-        | Not_found -&gt; raise (Error "unknown variable name"))
-  | Ast.Unary (op, operand) -&gt;
-      let operand = codegen_expr operand in
-      let callee = "unary" ^ (String.make 1 op) in
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown unary operator")
-      in
-      build_call callee [|operand|] "unop" builder
-  | Ast.Binary (op, lhs, rhs) -&gt;
-      let lhs_val = codegen_expr lhs in
-      let rhs_val = codegen_expr rhs in
-      begin
-        match op with
-        | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
-        | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
-        | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
-        | '&lt;' -&gt;
-            (* Convert bool 0/1 to double 0.0 or 1.0 *)
-            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-            build_uitofp i double_type "booltmp" builder
-        | _ -&gt;
-            (* If it wasn't a builtin binary operator, it must be a user defined
-             * one. Emit a call to it. *)
-            let callee = "binary" ^ (String.make 1 op) in
-            let callee =
-              match lookup_function callee the_module with
-              | Some callee -&gt; callee
-              | None -&gt; raise (Error "binary operator not found!")
-            in
-            build_call callee [|lhs_val; rhs_val|] "binop" builder
-      end
-  | Ast.Call (callee, args) -&gt;
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-  | Ast.If (cond, then_, else_) -&gt;
-      let cond = codegen_expr cond in
-
-      (* Convert condition to a bool by comparing equal to 0.0 *)
-      let zero = const_float double_type 0.0 in
-      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
-
-      (* Grab the first block so that we might later add the conditional branch
-       * to it at the end of the function. *)
-      let start_bb = insertion_block builder in
-      let the_function = block_parent start_bb in
-
-      let then_bb = append_block context "then" the_function in
-
-      (* Emit 'then' value. *)
-      position_at_end then_bb builder;
-      let then_val = codegen_expr then_ in
-
-      (* Codegen of 'then' can change the current block, update then_bb for the
-       * phi. We create a new name because one is used for the phi node, and the
-       * other is used for the conditional branch. *)
-      let new_then_bb = insertion_block builder in
-
-      (* Emit 'else' value. *)
-      let else_bb = append_block context "else" the_function in
-      position_at_end else_bb builder;
-      let else_val = codegen_expr else_ in
-
-      (* Codegen of 'else' can change the current block, update else_bb for the
-       * phi. *)
-      let new_else_bb = insertion_block builder in
-
-      (* Emit merge block. *)
-      let merge_bb = append_block context "ifcont" the_function in
-      position_at_end merge_bb builder;
-      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
-      let phi = build_phi incoming "iftmp" builder in
-
-      (* Return to the start block to add the conditional branch. *)
-      position_at_end start_bb builder;
-      ignore (build_cond_br cond_val then_bb else_bb builder);
-
-      (* Set a unconditional branch at the end of the 'then' block and the
-       * 'else' block to the 'merge' block. *)
-      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
-      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
-
-      (* Finally, set the builder to the end of the merge block. *)
-      position_at_end merge_bb builder;
-
-      phi
-  | Ast.For (var_name, start, end_, step, body) -&gt;
-      (* Emit the start code first, without 'variable' in scope. *)
-      let start_val = codegen_expr start in
-
-      (* Make the new basic block for the loop header, inserting after current
-       * block. *)
-      let preheader_bb = insertion_block builder in
-      let the_function = block_parent preheader_bb in
-      let loop_bb = append_block context "loop" the_function in
-
-      (* Insert an explicit fall through from the current block to the
-       * loop_bb. *)
-      ignore (build_br loop_bb builder);
-
-      (* Start insertion in loop_bb. *)
-      position_at_end loop_bb builder;
-
-      (* Start the PHI node with an entry for start. *)
-      let variable = build_phi [(start_val, preheader_bb)] var_name builder in
-
-      (* Within the loop, the variable is defined equal to the PHI node. If it
-       * shadows an existing variable, we have to restore it, so save it
-       * now. *)
-      let old_val =
-        try Some (Hashtbl.find named_values var_name) with Not_found -&gt; None
-      in
-      Hashtbl.add named_values var_name variable;
-
-      (* Emit the body of the loop.  This, like any other expr, can change the
-       * current BB.  Note that we ignore the value computed by the body, but
-       * don't allow an error *)
-      ignore (codegen_expr body);
-
-      (* Emit the step value. *)
-      let step_val =
-        match step with
-        | Some step -&gt; codegen_expr step
-        (* If not specified, use 1.0. *)
-        | None -&gt; const_float double_type 1.0
-      in
-
-      let next_var = build_add variable step_val "nextvar" builder in
-
-      (* Compute the end condition. *)
-      let end_cond = codegen_expr end_ in
-
-      (* Convert condition to a bool by comparing equal to 0.0. *)
-      let zero = const_float double_type 0.0 in
-      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
-
-      (* Create the "after loop" block and insert it. *)
-      let loop_end_bb = insertion_block builder in
-      let after_bb = append_block context "afterloop" the_function in
-
-      (* Insert the conditional branch into the end of loop_end_bb. *)
-      ignore (build_cond_br end_cond loop_bb after_bb builder);
-
-      (* Any new code will be inserted in after_bb. *)
-      position_at_end after_bb builder;
-
-      (* Add a new entry to the PHI node for the backedge. *)
-      add_incoming (next_var, loop_end_bb) variable;
-
-      (* Restore the unshadowed variable. *)
-      begin match old_val with
-      | Some old_val -&gt; Hashtbl.add named_values var_name old_val
-      | None -&gt; ()
-      end;
-
-      (* for expr always returns 0.0. *)
-      const_null double_type
-
-let codegen_proto = function
-  | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) -&gt;
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-        | None -&gt; declare_function name ft the_module
-
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f -&gt;
-            (* If 'f' already has a body, reject this. *)
-            if block_begin f &lt;&gt; At_end f then
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if element_type (type_of f) &lt;&gt; ft then
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a -&gt;
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-
-let codegen_func the_fpm = function
-  | Ast.Function (proto, body) -&gt;
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      (* If this is an operator, install it. *)
-      begin match proto with
-      | Ast.BinOpPrototype (name, args, prec) -&gt;
-          let op = name.[String.length name - 1] in
-          Hashtbl.add Parser.binop_precedence op prec;
-      | _ -&gt; ()
-      end;
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        (* Optimize the function. *)
-        let _ = PassManager.run_function the_function the_fpm in
-
-        the_function
-      with e -&gt;
-        delete_function the_function;
-        raise e
-</pre>
-</dd>
-
-<dt>toplevel.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop the_fpm the_execution_engine stream =
-  match Stream.peek stream with
-  | None -&gt; ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') -&gt;
-      Stream.junk stream;
-      main_loop the_fpm the_execution_engine stream
-
-  | Some token -&gt;
-      begin
-        try match token with
-        | Token.Def -&gt;
-            let e = Parser.parse_definition stream in
-            print_endline "parsed a function definition.";
-            dump_value (Codegen.codegen_func the_fpm e);
-        | Token.Extern -&gt;
-            let e = Parser.parse_extern stream in
-            print_endline "parsed an extern.";
-            dump_value (Codegen.codegen_proto e);
-        | _ -&gt;
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            let the_function = Codegen.codegen_func the_fpm e in
-            dump_value the_function;
-
-            (* JIT the function, returning a function pointer. *)
-            let result = ExecutionEngine.run_function the_function [||]
-              the_execution_engine in
-
-            print_string "Evaluated to ";
-            print_float (GenericValue.as_float Codegen.double_type result);
-            print_newline ();
-        with Stream.Error s | Codegen.Error s -&gt;
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready&gt; "; flush stdout;
-      main_loop the_fpm the_execution_engine stream
-</pre>
-</dd>
-
-<dt>toy.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-open Llvm_target
-open Llvm_scalar_opts
-
-let main () =
-  ignore (initialize_native_target ());
-
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '&lt;' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready&gt; "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combination the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-
-  (* Eliminate Common SubExpressions. *)
-  add_gvn the_fpm;
-
-  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
-  add_cfg_simplification the_fpm;
-
-  ignore (PassManager.initialize the_fpm);
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop the_fpm the_execution_engine stream;
-
-  (* Print out all the generated code. *)
-  dump_module Codegen.the_module
-;;
-
-main ()
-</pre>
-</dd>
-
-<dt>bindings.c</dt>
-<dd class="doc_code">
-<pre>
-#include &lt;stdio.h&gt;
-
-/* putchard - putchar that takes a double and returns 0. */
-extern double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-/* printd - printf that takes a double prints it as "%f\n", returning 0. */
-extern double printd(double X) {
-  printf("%f\n", X);
-  return 0;
-}
-</pre>
-</dd>
-</dl>
-
-<a href="OCamlLangImpl7.html">Next: Extending the language: mutable variables /
-SSA construction</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/OCamlLangImpl6.rst b/docs/tutorial/OCamlLangImpl6.rst
new file mode 100644
index 0000000000..7665647736
--- /dev/null
+++ b/docs/tutorial/OCamlLangImpl6.rst
@@ -0,0 +1,1444 @@
+============================================================
+Kaleidoscope: Extending the Language: User-defined Operators
+============================================================
+
+.. contents::
+   :local:
+
+Written by `Chris Lattner <mailto:sabre@nondot.org>`_ and `Erick
+Tryzelaar <mailto:idadesub@users.sourceforge.net>`_
+
+Chapter 6 Introduction
+======================
+
+Welcome to Chapter 6 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. At this point in our tutorial, we now
+have a fully functional language that is fairly minimal, but also
+useful. There is still one big problem with it, however. Our language
+doesn't have many useful operators (like division, logical negation, or
+even any comparisons besides less-than).
+
+This chapter of the tutorial takes a wild digression into adding
+user-defined operators to the simple and beautiful Kaleidoscope
+language. This digression now gives us a simple and ugly language in
+some ways, but also a powerful one at the same time. One of the great
+things about creating your own language is that you get to decide what
+is good or bad. In this tutorial we'll assume that it is okay to use
+this as a way to show some interesting parsing techniques.
+
+At the end of this tutorial, we'll run through an example Kaleidoscope
+application that `renders the Mandelbrot set <#example>`_. This gives an
+example of what you can build with Kaleidoscope and its feature set.
+
+User-defined Operators: the Idea
+================================
+
+The "operator overloading" that we will add to Kaleidoscope is more
+general than languages like C++. In C++, you are only allowed to
+redefine existing operators: you can't programatically change the
+grammar, introduce new operators, change precedence levels, etc. In this
+chapter, we will add this capability to Kaleidoscope, which will let the
+user round out the set of operators that are supported.
+
+The point of going into user-defined operators in a tutorial like this
+is to show the power and flexibility of using a hand-written parser.
+Thus far, the parser we have been implementing uses recursive descent
+for most parts of the grammar and operator precedence parsing for the
+expressions. See `Chapter 2 <OCamlLangImpl2.html>`_ for details. Without
+using operator precedence parsing, it would be very difficult to allow
+the programmer to introduce new operators into the grammar: the grammar
+is dynamically extensible as the JIT runs.
+
+The two specific features we'll add are programmable unary operators
+(right now, Kaleidoscope has no unary operators at all) as well as
+binary operators. An example of this is:
+
+::
+
+    # Logical unary not.
+    def unary!(v)
+      if v then
+        0
+      else
+        1;
+
+    # Define > with the same precedence as <.
+    def binary> 10 (LHS RHS)
+      RHS < LHS;
+
+    # Binary "logical or", (note that it does not "short circuit")
+    def binary| 5 (LHS RHS)
+      if LHS then
+        1
+      else if RHS then
+        1
+      else
+        0;
+
+    # Define = with slightly lower precedence than relationals.
+    def binary= 9 (LHS RHS)
+      !(LHS < RHS | LHS > RHS);
+
+Many languages aspire to being able to implement their standard runtime
+library in the language itself. In Kaleidoscope, we can implement
+significant parts of the language in the library!
+
+We will break down implementation of these features into two parts:
+implementing support for user-defined binary operators and adding unary
+operators.
+
+User-defined Binary Operators
+=============================
+
+Adding support for user-defined binary operators is pretty simple with
+our current framework. We'll first add support for the unary/binary
+keywords:
+
+.. code-block:: ocaml
+
+    type token =
+      ...
+      (* operators *)
+      | Binary | Unary
+
+    ...
+
+    and lex_ident buffer = parser
+      ...
+          | "for" -> [< 'Token.For; stream >]
+          | "in" -> [< 'Token.In; stream >]
+          | "binary" -> [< 'Token.Binary; stream >]
+          | "unary" -> [< 'Token.Unary; stream >]
+
+This just adds lexer support for the unary and binary keywords, like we
+did in `previous chapters <OCamlLangImpl5.html#iflexer>`_. One nice
+thing about our current AST, is that we represent binary operators with
+full generalisation by using their ASCII code as the opcode. For our
+extended operators, we'll use this same representation, so we don't need
+any new AST or parser support.
+
+On the other hand, we have to be able to represent the definitions of
+these new operators, in the "def binary\| 5" part of the function
+definition. In our grammar so far, the "name" for the function
+definition is parsed as the "prototype" production and into the
+``Ast.Prototype`` AST node. To represent our new user-defined operators
+as prototypes, we have to extend the ``Ast.Prototype`` AST node like
+this:
+
+.. code-block:: ocaml
+
+    (* proto - This type represents the "prototype" for a function, which captures
+     * its name, and its argument names (thus implicitly the number of arguments the
+     * function takes). *)
+    type proto =
+      | Prototype of string * string array
+      | BinOpPrototype of string * string array * int
+
+Basically, in addition to knowing a name for the prototype, we now keep
+track of whether it was an operator, and if it was, what precedence
+level the operator is at. The precedence is only used for binary
+operators (as you'll see below, it just doesn't apply for unary
+operators). Now that we have a way to represent the prototype for a
+user-defined operator, we need to parse it:
+
+.. code-block:: ocaml
+
+    (* prototype
+     *   ::= id '(' id* ')'
+     *   ::= binary LETTER number? (id, id)
+     *   ::= unary LETTER number? (id) *)
+    let parse_prototype =
+      let rec parse_args accumulator = parser
+        | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+        | [< >] -> accumulator
+      in
+      let parse_operator = parser
+        | [< 'Token.Unary >] -> "unary", 1
+        | [< 'Token.Binary >] -> "binary", 2
+      in
+      let parse_binary_precedence = parser
+        | [< 'Token.Number n >] -> int_of_float n
+        | [< >] -> 30
+      in
+      parser
+      | [< 'Token.Ident id;
+           'Token.Kwd '(' ?? "expected '(' in prototype";
+           args=parse_args [];
+           'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+          (* success. *)
+          Ast.Prototype (id, Array.of_list (List.rev args))
+      | [< (prefix, kind)=parse_operator;
+           'Token.Kwd op ?? "expected an operator";
+           (* Read the precedence if present. *)
+           binary_precedence=parse_binary_precedence;
+           'Token.Kwd '(' ?? "expected '(' in prototype";
+            args=parse_args [];
+           'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+          let name = prefix ^ (String.make 1 op) in
+          let args = Array.of_list (List.rev args) in
+
+          (* Verify right number of arguments for operator. *)
+          if Array.length args != kind
+          then raise (Stream.Error "invalid number of operands for operator")
+          else
+            if kind == 1 then
+              Ast.Prototype (name, args)
+            else
+              Ast.BinOpPrototype (name, args, binary_precedence)
+      | [< >] ->
+          raise (Stream.Error "expected function name in prototype")
+
+This is all fairly straightforward parsing code, and we have already
+seen a lot of similar code in the past. One interesting part about the
+code above is the couple lines that set up ``name`` for binary
+operators. This builds names like "binary@" for a newly defined "@"
+operator. This then takes advantage of the fact that symbol names in the
+LLVM symbol table are allowed to have any character in them, including
+embedded nul characters.
+
+The next interesting thing to add, is codegen support for these binary
+operators. Given our current structure, this is a simple addition of a
+default case for our existing binary operator node:
+
+.. code-block:: ocaml
+
+    let codegen_expr = function
+      ...
+      | Ast.Binary (op, lhs, rhs) ->
+          let lhs_val = codegen_expr lhs in
+          let rhs_val = codegen_expr rhs in
+          begin
+            match op with
+            | '+' -> build_add lhs_val rhs_val "addtmp" builder
+            | '-' -> build_sub lhs_val rhs_val "subtmp" builder
+            | '*' -> build_mul lhs_val rhs_val "multmp" builder
+            | '<' ->
+                (* Convert bool 0/1 to double 0.0 or 1.0 *)
+                let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+                build_uitofp i double_type "booltmp" builder
+            | _ ->
+                (* If it wasn't a builtin binary operator, it must be a user defined
+                 * one. Emit a call to it. *)
+                let callee = "binary" ^ (String.make 1 op) in
+                let callee =
+                  match lookup_function callee the_module with
+                  | Some callee -> callee
+                  | None -> raise (Error "binary operator not found!")
+                in
+                build_call callee [|lhs_val; rhs_val|] "binop" builder
+          end
+
+As you can see above, the new code is actually really simple. It just
+does a lookup for the appropriate operator in the symbol table and
+generates a function call to it. Since user-defined operators are just
+built as normal functions (because the "prototype" boils down to a
+function with the right name) everything falls into place.
+
+The final piece of code we are missing, is a bit of top level magic:
+
+.. code-block:: ocaml
+
+    let codegen_func the_fpm = function
+      | Ast.Function (proto, body) ->
+          Hashtbl.clear named_values;
+          let the_function = codegen_proto proto in
+
+          (* If this is an operator, install it. *)
+          begin match proto with
+          | Ast.BinOpPrototype (name, args, prec) ->
+              let op = name.[String.length name - 1] in
+              Hashtbl.add Parser.binop_precedence op prec;
+          | _ -> ()
+          end;
+
+          (* Create a new basic block to start insertion into. *)
+          let bb = append_block context "entry" the_function in
+          position_at_end bb builder;
+          ...
+
+Basically, before codegening a function, if it is a user-defined
+operator, we register it in the precedence table. This allows the binary
+operator parsing logic we already have in place to handle it. Since we
+are working on a fully-general operator precedence parser, this is all
+we need to do to "extend the grammar".
+
+Now we have useful user-defined binary operators. This builds a lot on
+the previous framework we built for other operators. Adding unary
+operators is a bit more challenging, because we don't have any framework
+for it yet - lets see what it takes.
+
+User-defined Unary Operators
+============================
+
+Since we don't currently support unary operators in the Kaleidoscope
+language, we'll need to add everything to support them. Above, we added
+simple support for the 'unary' keyword to the lexer. In addition to
+that, we need an AST node:
+
+.. code-block:: ocaml
+
+    type expr =
+      ...
+      (* variant for a unary operator. *)
+      | Unary of char * expr
+      ...
+
+This AST node is very simple and obvious by now. It directly mirrors the
+binary operator AST node, except that it only has one child. With this,
+we need to add the parsing logic. Parsing a unary operator is pretty
+simple: we'll add a new function to do it:
+
+.. code-block:: ocaml
+
+    (* unary
+     *   ::= primary
+     *   ::= '!' unary *)
+    and parse_unary = parser
+      (* If this is a unary operator, read it. *)
+      | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] ->
+          Ast.Unary (op, operand)
+
+      (* If the current token is not an operator, it must be a primary expr. *)
+      | [< stream >] -> parse_primary stream
+
+The grammar we add is pretty straightforward here. If we see a unary
+operator when parsing a primary operator, we eat the operator as a
+prefix and parse the remaining piece as another unary operator. This
+allows us to handle multiple unary operators (e.g. "!!x"). Note that
+unary operators can't have ambiguous parses like binary operators can,
+so there is no need for precedence information.
+
+The problem with this function, is that we need to call ParseUnary from
+somewhere. To do this, we change previous callers of ParsePrimary to
+call ``parse_unary`` instead:
+
+.. code-block:: ocaml
+
+    (* binoprhs
+     *   ::= ('+' primary)* *)
+    and parse_bin_rhs expr_prec lhs stream =
+            ...
+            (* Parse the unary expression after the binary operator. *)
+            let rhs = parse_unary stream in
+            ...
+
+    ...
+
+    (* expression
+     *   ::= primary binoprhs *)
+    and parse_expr = parser
+      | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream
+
+With these two simple changes, we are now able to parse unary operators
+and build the AST for them. Next up, we need to add parser support for
+prototypes, to parse the unary operator prototype. We extend the binary
+operator code above with:
+
+.. code-block:: ocaml
+
+    (* prototype
+     *   ::= id '(' id* ')'
+     *   ::= binary LETTER number? (id, id)
+     *   ::= unary LETTER number? (id) *)
+    let parse_prototype =
+      let rec parse_args accumulator = parser
+        | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+        | [< >] -> accumulator
+      in
+      let parse_operator = parser
+        | [< 'Token.Unary >] -> "unary", 1
+        | [< 'Token.Binary >] -> "binary", 2
+      in
+      let parse_binary_precedence = parser
+        | [< 'Token.Number n >] -> int_of_float n
+        | [< >] -> 30
+      in
+      parser
+      | [< 'Token.Ident id;
+           'Token.Kwd '(' ?? "expected '(' in prototype";
+           args=parse_args [];
+           'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+          (* success. *)
+          Ast.Prototype (id, Array.of_list (List.rev args))
+      | [< (prefix, kind)=parse_operator;
+           'Token.Kwd op ?? "expected an operator";
+           (* Read the precedence if present. *)
+           binary_precedence=parse_binary_precedence;
+           'Token.Kwd '(' ?? "expected '(' in prototype";
+            args=parse_args [];
+           'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+          let name = prefix ^ (String.make 1 op) in
+          let args = Array.of_list (List.rev args) in
+
+          (* Verify right number of arguments for operator. *)
+          if Array.length args != kind
+          then raise (Stream.Error "invalid number of operands for operator")
+          else
+            if kind == 1 then
+              Ast.Prototype (name, args)
+            else
+              Ast.BinOpPrototype (name, args, binary_precedence)
+      | [< >] ->
+          raise (Stream.Error "expected function name in prototype")
+
+As with binary operators, we name unary operators with a name that
+includes the operator character. This assists us at code generation
+time. Speaking of, the final piece we need to add is codegen support for
+unary operators. It looks like this:
+
+.. code-block:: ocaml
+
+    let rec codegen_expr = function
+      ...
+      | Ast.Unary (op, operand) ->
+          let operand = codegen_expr operand in
+          let callee = "unary" ^ (String.make 1 op) in
+          let callee =
+            match lookup_function callee the_module with
+            | Some callee -> callee
+            | None -> raise (Error "unknown unary operator")
+          in
+          build_call callee [|operand|] "unop" builder
+
+This code is similar to, but simpler than, the code for binary
+operators. It is simpler primarily because it doesn't need to handle any
+predefined operators.
+
+Kicking the Tires
+=================
+
+It is somewhat hard to believe, but with a few simple extensions we've
+covered in the last chapters, we have grown a real-ish language. With
+this, we can do a lot of interesting things, including I/O, math, and a
+bunch of other things. For example, we can now add a nice sequencing
+operator (printd is defined to print out the specified value and a
+newline):
+
+::
+
+    ready> extern printd(x);
+    Read extern: declare double @printd(double)
+    ready> def binary : 1 (x y) 0;  # Low-precedence operator that ignores operands.
+    ..
+    ready> printd(123) : printd(456) : printd(789);
+    123.000000
+    456.000000
+    789.000000
+    Evaluated to 0.000000
+
+We can also define a bunch of other "primitive" operations, such as:
+
+::
+
+    # Logical unary not.
+    def unary!(v)
+      if v then
+        0
+      else
+        1;
+
+    # Unary negate.
+    def unary-(v)
+      0-v;
+
+    # Define > with the same precedence as <.
+    def binary> 10 (LHS RHS)
+      RHS < LHS;
+
+    # Binary logical or, which does not short circuit.
+    def binary| 5 (LHS RHS)
+      if LHS then
+        1
+      else if RHS then
+        1
+      else
+        0;
+
+    # Binary logical and, which does not short circuit.
+    def binary& 6 (LHS RHS)
+      if !LHS then
+        0
+      else
+        !!RHS;
+
+    # Define = with slightly lower precedence than relationals.
+    def binary = 9 (LHS RHS)
+      !(LHS < RHS | LHS > RHS);
+
+Given the previous if/then/else support, we can also define interesting
+functions for I/O. For example, the following prints out a character
+whose "density" reflects the value passed in: the lower the value, the
+denser the character:
+
+::
+
+    ready>
+
+    extern putchard(char)
+    def printdensity(d)
+      if d > 8 then
+        putchard(32)  # ' '
+      else if d > 4 then
+        putchard(46)  # '.'
+      else if d > 2 then
+        putchard(43)  # '+'
+      else
+        putchard(42); # '*'
+    ...
+    ready> printdensity(1): printdensity(2): printdensity(3) :
+              printdensity(4): printdensity(5): printdensity(9): putchard(10);
+    *++..
+    Evaluated to 0.000000
+
+Based on these simple primitive operations, we can start to define more
+interesting things. For example, here's a little function that solves
+for the number of iterations it takes a function in the complex plane to
+converge:
+
+::
+
+    # determine whether the specific location diverges.
+    # Solve for z = z^2 + c in the complex plane.
+    def mandleconverger(real imag iters creal cimag)
+      if iters > 255 | (real*real + imag*imag > 4) then
+        iters
+      else
+        mandleconverger(real*real - imag*imag + creal,
+                        2*real*imag + cimag,
+                        iters+1, creal, cimag);
+
+    # return the number of iterations required for the iteration to escape
+    def mandleconverge(real imag)
+      mandleconverger(real, imag, 0, real, imag);
+
+This "z = z\ :sup:`2`\  + c" function is a beautiful little creature
+that is the basis for computation of the `Mandelbrot
+Set <http://en.wikipedia.org/wiki/Mandelbrot_set>`_. Our
+``mandelconverge`` function returns the number of iterations that it
+takes for a complex orbit to escape, saturating to 255. This is not a
+very useful function by itself, but if you plot its value over a
+two-dimensional plane, you can see the Mandelbrot set. Given that we are
+limited to using putchard here, our amazing graphical output is limited,
+but we can whip together something using the density plotter above:
+
+::
+
+    # compute and plot the mandlebrot set with the specified 2 dimensional range
+    # info.
+    def mandelhelp(xmin xmax xstep   ymin ymax ystep)
+      for y = ymin, y < ymax, ystep in (
+        (for x = xmin, x < xmax, xstep in
+           printdensity(mandleconverge(x,y)))
+        : putchard(10)
+      )
+
+    # mandel - This is a convenient helper function for plotting the mandelbrot set
+    # from the specified position with the specified Magnification.
+    def mandel(realstart imagstart realmag imagmag)
+      mandelhelp(realstart, realstart+realmag*78, realmag,
+                 imagstart, imagstart+imagmag*40, imagmag);
+
+Given this, we can try plotting out the mandlebrot set! Lets try it out:
+
+::
+
+    ready> mandel(-2.3, -1.3, 0.05, 0.07);
+    *******************************+++++++++++*************************************
+    *************************+++++++++++++++++++++++*******************************
+    **********************+++++++++++++++++++++++++++++****************************
+    *******************+++++++++++++++++++++.. ...++++++++*************************
+    *****************++++++++++++++++++++++.... ...+++++++++***********************
+    ***************+++++++++++++++++++++++.....   ...+++++++++*********************
+    **************+++++++++++++++++++++++....     ....+++++++++********************
+    *************++++++++++++++++++++++......      .....++++++++*******************
+    ************+++++++++++++++++++++.......       .......+++++++******************
+    ***********+++++++++++++++++++....                ... .+++++++*****************
+    **********+++++++++++++++++.......                     .+++++++****************
+    *********++++++++++++++...........                    ...+++++++***************
+    ********++++++++++++............                      ...++++++++**************
+    ********++++++++++... ..........                        .++++++++**************
+    *******+++++++++.....                                   .+++++++++*************
+    *******++++++++......                                  ..+++++++++*************
+    *******++++++.......                                   ..+++++++++*************
+    *******+++++......                                     ..+++++++++*************
+    *******.... ....                                      ...+++++++++*************
+    *******.... .                                         ...+++++++++*************
+    *******+++++......                                    ...+++++++++*************
+    *******++++++.......                                   ..+++++++++*************
+    *******++++++++......                                   .+++++++++*************
+    *******+++++++++.....                                  ..+++++++++*************
+    ********++++++++++... ..........                        .++++++++**************
+    ********++++++++++++............                      ...++++++++**************
+    *********++++++++++++++..........                     ...+++++++***************
+    **********++++++++++++++++........                     .+++++++****************
+    **********++++++++++++++++++++....                ... ..+++++++****************
+    ***********++++++++++++++++++++++.......       .......++++++++*****************
+    ************+++++++++++++++++++++++......      ......++++++++******************
+    **************+++++++++++++++++++++++....      ....++++++++********************
+    ***************+++++++++++++++++++++++.....   ...+++++++++*********************
+    *****************++++++++++++++++++++++....  ...++++++++***********************
+    *******************+++++++++++++++++++++......++++++++*************************
+    *********************++++++++++++++++++++++.++++++++***************************
+    *************************+++++++++++++++++++++++*******************************
+    ******************************+++++++++++++************************************
+    *******************************************************************************
+    *******************************************************************************
+    *******************************************************************************
+    Evaluated to 0.000000
+    ready> mandel(-2, -1, 0.02, 0.04);
+    **************************+++++++++++++++++++++++++++++++++++++++++++++++++++++
+    ***********************++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+    *********************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.
+    *******************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++...
+    *****************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.....
+    ***************++++++++++++++++++++++++++++++++++++++++++++++++++++++++........
+    **************++++++++++++++++++++++++++++++++++++++++++++++++++++++...........
+    ************+++++++++++++++++++++++++++++++++++++++++++++++++++++..............
+    ***********++++++++++++++++++++++++++++++++++++++++++++++++++........        .
+    **********++++++++++++++++++++++++++++++++++++++++++++++.............
+    ********+++++++++++++++++++++++++++++++++++++++++++..................
+    *******+++++++++++++++++++++++++++++++++++++++.......................
+    ******+++++++++++++++++++++++++++++++++++...........................
+    *****++++++++++++++++++++++++++++++++............................
+    *****++++++++++++++++++++++++++++...............................
+    ****++++++++++++++++++++++++++......   .........................
+    ***++++++++++++++++++++++++.........     ......    ...........
+    ***++++++++++++++++++++++............
+    **+++++++++++++++++++++..............
+    **+++++++++++++++++++................
+    *++++++++++++++++++.................
+    *++++++++++++++++............ ...
+    *++++++++++++++..............
+    *+++....++++................
+    *..........  ...........
+    *
+    *..........  ...........
+    *+++....++++................
+    *++++++++++++++..............
+    *++++++++++++++++............ ...
+    *++++++++++++++++++.................
+    **+++++++++++++++++++................
+    **+++++++++++++++++++++..............
+    ***++++++++++++++++++++++............
+    ***++++++++++++++++++++++++.........     ......    ...........
+    ****++++++++++++++++++++++++++......   .........................
+    *****++++++++++++++++++++++++++++...............................
+    *****++++++++++++++++++++++++++++++++............................
+    ******+++++++++++++++++++++++++++++++++++...........................
+    *******+++++++++++++++++++++++++++++++++++++++.......................
+    ********+++++++++++++++++++++++++++++++++++++++++++..................
+    Evaluated to 0.000000
+    ready> mandel(-0.9, -1.4, 0.02, 0.03);
+    *******************************************************************************
+    *******************************************************************************
+    *******************************************************************************
+    **********+++++++++++++++++++++************************************************
+    *+++++++++++++++++++++++++++++++++++++++***************************************
+    +++++++++++++++++++++++++++++++++++++++++++++**********************************
+    ++++++++++++++++++++++++++++++++++++++++++++++++++*****************************
+    ++++++++++++++++++++++++++++++++++++++++++++++++++++++*************************
+    +++++++++++++++++++++++++++++++++++++++++++++++++++++++++**********************
+    +++++++++++++++++++++++++++++++++.........++++++++++++++++++*******************
+    +++++++++++++++++++++++++++++++....   ......+++++++++++++++++++****************
+    +++++++++++++++++++++++++++++.......  ........+++++++++++++++++++**************
+    ++++++++++++++++++++++++++++........   ........++++++++++++++++++++************
+    +++++++++++++++++++++++++++.........     ..  ...+++++++++++++++++++++**********
+    ++++++++++++++++++++++++++...........        ....++++++++++++++++++++++********
+    ++++++++++++++++++++++++.............       .......++++++++++++++++++++++******
+    +++++++++++++++++++++++.............        ........+++++++++++++++++++++++****
+    ++++++++++++++++++++++...........           ..........++++++++++++++++++++++***
+    ++++++++++++++++++++...........                .........++++++++++++++++++++++*
+    ++++++++++++++++++............                  ...........++++++++++++++++++++
+    ++++++++++++++++...............                 .............++++++++++++++++++
+    ++++++++++++++.................                 ...............++++++++++++++++
+    ++++++++++++..................                  .................++++++++++++++
+    +++++++++..................                      .................+++++++++++++
+    ++++++........        .                               .........  ..++++++++++++
+    ++............                                         ......    ....++++++++++
+    ..............                                                    ...++++++++++
+    ..............                                                    ....+++++++++
+    ..............                                                    .....++++++++
+    .............                                                    ......++++++++
+    ...........                                                     .......++++++++
+    .........                                                       ........+++++++
+    .........                                                       ........+++++++
+    .........                                                           ....+++++++
+    ........                                                             ...+++++++
+    .......                                                              ...+++++++
+                                                                        ....+++++++
+                                                                       .....+++++++
+                                                                        ....+++++++
+                                                                        ....+++++++
+                                                                        ....+++++++
+    Evaluated to 0.000000
+    ready> ^D
+
+At this point, you may be starting to realize that Kaleidoscope is a
+real and powerful language. It may not be self-similar :), but it can be
+used to plot things that are!
+
+With this, we conclude the "adding user-defined operators" chapter of
+the tutorial. We have successfully augmented our language, adding the
+ability to extend the language in the library, and we have shown how
+this can be used to build a simple but interesting end-user application
+in Kaleidoscope. At this point, Kaleidoscope can build a variety of
+applications that are functional and can call functions with
+side-effects, but it can't actually define and mutate a variable itself.
+
+Strikingly, variable mutation is an important feature of some languages,
+and it is not at all obvious how to `add support for mutable
+variables <OCamlLangImpl7.html>`_ without having to add an "SSA
+construction" phase to your front-end. In the next chapter, we will
+describe how you can add variable mutation without building SSA in your
+front-end.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+the if/then/else and for expressions.. To build this example, use:
+
+.. code-block:: bash
+
+    # Compile
+    ocamlbuild toy.byte
+    # Run
+    ./toy.byte
+
+Here is the code:
+
+\_tags:
+    ::
+
+        <{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
+        <*.{byte,native}>: g++, use_llvm, use_llvm_analysis
+        <*.{byte,native}>: use_llvm_executionengine, use_llvm_target
+        <*.{byte,native}>: use_llvm_scalar_opts, use_bindings
+
+myocamlbuild.ml:
+    .. code-block:: ocaml
+
+        open Ocamlbuild_plugin;;
+
+        ocaml_lib ~extern:true "llvm";;
+        ocaml_lib ~extern:true "llvm_analysis";;
+        ocaml_lib ~extern:true "llvm_executionengine";;
+        ocaml_lib ~extern:true "llvm_target";;
+        ocaml_lib ~extern:true "llvm_scalar_opts";;
+
+        flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);;
+        dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
+
+token.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer Tokens
+         *===----------------------------------------------------------------------===*)
+
+        (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+         * these others for known things. *)
+        type token =
+          (* commands *)
+          | Def | Extern
+
+          (* primary *)
+          | Ident of string | Number of float
+
+          (* unknown *)
+          | Kwd of char
+
+          (* control *)
+          | If | Then | Else
+          | For | In
+
+          (* operators *)
+          | Binary | Unary
+
+lexer.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer
+         *===----------------------------------------------------------------------===*)
+
+        let rec lex = parser
+          (* Skip any whitespace. *)
+          | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+          (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+
+          (* number: [0-9.]+ *)
+          | [< ' ('0' .. '9' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+
+          (* Comment until end of line. *)
+          | [< ' ('#'); stream >] ->
+              lex_comment stream
+
+          (* Otherwise, just return the character as its ascii value. *)
+          | [< 'c; stream >] ->
+              [< 'Token.Kwd c; lex stream >]
+
+          (* end of stream. *)
+          | [< >] -> [< >]
+
+        and lex_number buffer = parser
+          | [< ' ('0' .. '9' | '.' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+          | [< stream=lex >] ->
+              [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+        and lex_ident buffer = parser
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+          | [< stream=lex >] ->
+              match Buffer.contents buffer with
+              | "def" -> [< 'Token.Def; stream >]
+              | "extern" -> [< 'Token.Extern; stream >]
+              | "if" -> [< 'Token.If; stream >]
+              | "then" -> [< 'Token.Then; stream >]
+              | "else" -> [< 'Token.Else; stream >]
+              | "for" -> [< 'Token.For; stream >]
+              | "in" -> [< 'Token.In; stream >]
+              | "binary" -> [< 'Token.Binary; stream >]
+              | "unary" -> [< 'Token.Unary; stream >]
+              | id -> [< 'Token.Ident id; stream >]
+
+        and lex_comment = parser
+          | [< ' ('\n'); stream=lex >] -> stream
+          | [< 'c; e=lex_comment >] -> e
+          | [< >] -> [< >]
+
+ast.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Abstract Syntax Tree (aka Parse Tree)
+         *===----------------------------------------------------------------------===*)
+
+        (* expr - Base type for all expression nodes. *)
+        type expr =
+          (* variant for numeric literals like "1.0". *)
+          | Number of float
+
+          (* variant for referencing a variable, like "a". *)
+          | Variable of string
+
+          (* variant for a unary operator. *)
+          | Unary of char * expr
+
+          (* variant for a binary operator. *)
+          | Binary of char * expr * expr
+
+          (* variant for function calls. *)
+          | Call of string * expr array
+
+          (* variant for if/then/else. *)
+          | If of expr * expr * expr
+
+          (* variant for for/in. *)
+          | For of string * expr * expr * expr option * expr
+
+        (* proto - This type represents the "prototype" for a function, which captures
+         * its name, and its argument names (thus implicitly the number of arguments the
+         * function takes). *)
+        type proto =
+          | Prototype of string * string array
+          | BinOpPrototype of string * string array * int
+
+        (* func - This type represents a function definition itself. *)
+        type func = Function of proto * expr
+
+parser.ml:
+    .. code-block:: ocaml
+
+        (*===---------------------------------------------------------------------===
+         * Parser
+         *===---------------------------------------------------------------------===*)
+
+        (* binop_precedence - This holds the precedence for each binary operator that is
+         * defined *)
+        let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+        (* precedence - Get the precedence of the pending binary operator token. *)
+        let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+        (* primary
+         *   ::= identifier
+         *   ::= numberexpr
+         *   ::= parenexpr
+         *   ::= ifexpr
+         *   ::= forexpr *)
+        let rec parse_primary = parser
+          (* numberexpr ::= number *)
+          | [< 'Token.Number n >] -> Ast.Number n
+
+          (* parenexpr ::= '(' expression ')' *)
+          | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+          (* identifierexpr
+           *   ::= identifier
+           *   ::= identifier '(' argumentexpr ')' *)
+          | [< 'Token.Ident id; stream >] ->
+              let rec parse_args accumulator = parser
+                | [< e=parse_expr; stream >] ->
+                    begin parser
+                      | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+                      | [< >] -> e :: accumulator
+                    end stream
+                | [< >] -> accumulator
+              in
+              let rec parse_ident id = parser
+                (* Call. *)
+                | [< 'Token.Kwd '(';
+                     args=parse_args [];
+                     'Token.Kwd ')' ?? "expected ')'">] ->
+                    Ast.Call (id, Array.of_list (List.rev args))
+
+                (* Simple variable ref. *)
+                | [< >] -> Ast.Variable id
+              in
+              parse_ident id stream
+
+          (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+          | [< 'Token.If; c=parse_expr;
+               'Token.Then ?? "expected 'then'"; t=parse_expr;
+               'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
+              Ast.If (c, t, e)
+
+          (* forexpr
+                ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
+          | [< 'Token.For;
+               'Token.Ident id ?? "expected identifier after for";
+               'Token.Kwd '=' ?? "expected '=' after for";
+               stream >] ->
+              begin parser
+                | [<
+                     start=parse_expr;
+                     'Token.Kwd ',' ?? "expected ',' after for";
+                     end_=parse_expr;
+                     stream >] ->
+                    let step =
+                      begin parser
+                      | [< 'Token.Kwd ','; step=parse_expr >] -> Some step
+                      | [< >] -> None
+                      end stream
+                    in
+                    begin parser
+                    | [< 'Token.In; body=parse_expr >] ->
+                        Ast.For (id, start, end_, step, body)
+                    | [< >] ->
+                        raise (Stream.Error "expected 'in' after for")
+                    end stream
+                | [< >] ->
+                    raise (Stream.Error "expected '=' after for")
+              end stream
+
+          | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+        (* unary
+         *   ::= primary
+         *   ::= '!' unary *)
+        and parse_unary = parser
+          (* If this is a unary operator, read it. *)
+          | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] ->
+              Ast.Unary (op, operand)
+
+          (* If the current token is not an operator, it must be a primary expr. *)
+          | [< stream >] -> parse_primary stream
+
+        (* binoprhs
+         *   ::= ('+' primary)* *)
+        and parse_bin_rhs expr_prec lhs stream =
+          match Stream.peek stream with
+          (* If this is a binop, find its precedence. *)
+          | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+              let token_prec = precedence c in
+
+              (* If this is a binop that binds at least as tightly as the current binop,
+               * consume it, otherwise we are done. *)
+              if token_prec < expr_prec then lhs else begin
+                (* Eat the binop. *)
+                Stream.junk stream;
+
+                (* Parse the unary expression after the binary operator. *)
+                let rhs = parse_unary stream in
+
+                (* Okay, we know this is a binop. *)
+                let rhs =
+                  match Stream.peek stream with
+                  | Some (Token.Kwd c2) ->
+                      (* If BinOp binds less tightly with rhs than the operator after
+                       * rhs, let the pending operator take rhs as its lhs. *)
+                      let next_prec = precedence c2 in
+                      if token_prec < next_prec
+                      then parse_bin_rhs (token_prec + 1) rhs stream
+                      else rhs
+                  | _ -> rhs
+                in
+
+                (* Merge lhs/rhs. *)
+                let lhs = Ast.Binary (c, lhs, rhs) in
+                parse_bin_rhs expr_prec lhs stream
+              end
+          | _ -> lhs
+
+        (* expression
+         *   ::= primary binoprhs *)
+        and parse_expr = parser
+          | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream
+
+        (* prototype
+         *   ::= id '(' id* ')'
+         *   ::= binary LETTER number? (id, id)
+         *   ::= unary LETTER number? (id) *)
+        let parse_prototype =
+          let rec parse_args accumulator = parser
+            | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+            | [< >] -> accumulator
+          in
+          let parse_operator = parser
+            | [< 'Token.Unary >] -> "unary", 1
+            | [< 'Token.Binary >] -> "binary", 2
+          in
+          let parse_binary_precedence = parser
+            | [< 'Token.Number n >] -> int_of_float n
+            | [< >] -> 30
+          in
+          parser
+          | [< 'Token.Ident id;
+               'Token.Kwd '(' ?? "expected '(' in prototype";
+               args=parse_args [];
+               'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+              (* success. *)
+              Ast.Prototype (id, Array.of_list (List.rev args))
+          | [< (prefix, kind)=parse_operator;
+               'Token.Kwd op ?? "expected an operator";
+               (* Read the precedence if present. *)
+               binary_precedence=parse_binary_precedence;
+               'Token.Kwd '(' ?? "expected '(' in prototype";
+                args=parse_args [];
+               'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+              let name = prefix ^ (String.make 1 op) in
+              let args = Array.of_list (List.rev args) in
+
+              (* Verify right number of arguments for operator. *)
+              if Array.length args != kind
+              then raise (Stream.Error "invalid number of operands for operator")
+              else
+                if kind == 1 then
+                  Ast.Prototype (name, args)
+                else
+                  Ast.BinOpPrototype (name, args, binary_precedence)
+          | [< >] ->
+              raise (Stream.Error "expected function name in prototype")
+
+        (* definition ::= 'def' prototype expression *)
+        let parse_definition = parser
+          | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+              Ast.Function (p, e)
+
+        (* toplevelexpr ::= expression *)
+        let parse_toplevel = parser
+          | [< e=parse_expr >] ->
+              (* Make an anonymous proto. *)
+              Ast.Function (Ast.Prototype ("", [||]), e)
+
+        (*  external ::= 'extern' prototype *)
+        let parse_extern = parser
+          | [< 'Token.Extern; e=parse_prototype >] -> e
+
+codegen.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Code Generation
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+
+        exception Error of string
+
+        let context = global_context ()
+        let the_module = create_module context "my cool jit"
+        let builder = builder context
+        let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+        let double_type = double_type context
+
+        let rec codegen_expr = function
+          | Ast.Number n -> const_float double_type n
+          | Ast.Variable name ->
+              (try Hashtbl.find named_values name with
+                | Not_found -> raise (Error "unknown variable name"))
+          | Ast.Unary (op, operand) ->
+              let operand = codegen_expr operand in
+              let callee = "unary" ^ (String.make 1 op) in
+              let callee =
+                match lookup_function callee the_module with
+                | Some callee -> callee
+                | None -> raise (Error "unknown unary operator")
+              in
+              build_call callee [|operand|] "unop" builder
+          | Ast.Binary (op, lhs, rhs) ->
+              let lhs_val = codegen_expr lhs in
+              let rhs_val = codegen_expr rhs in
+              begin
+                match op with
+                | '+' -> build_add lhs_val rhs_val "addtmp" builder
+                | '-' -> build_sub lhs_val rhs_val "subtmp" builder
+                | '*' -> build_mul lhs_val rhs_val "multmp" builder
+                | '<' ->
+                    (* Convert bool 0/1 to double 0.0 or 1.0 *)
+                    let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+                    build_uitofp i double_type "booltmp" builder
+                | _ ->
+                    (* If it wasn't a builtin binary operator, it must be a user defined
+                     * one. Emit a call to it. *)
+                    let callee = "binary" ^ (String.make 1 op) in
+                    let callee =
+                      match lookup_function callee the_module with
+                      | Some callee -> callee
+                      | None -> raise (Error "binary operator not found!")
+                    in
+                    build_call callee [|lhs_val; rhs_val|] "binop" builder
+              end
+          | Ast.Call (callee, args) ->
+              (* Look up the name in the module table. *)
+              let callee =
+                match lookup_function callee the_module with
+                | Some callee -> callee
+                | None -> raise (Error "unknown function referenced")
+              in
+              let params = params callee in
+
+              (* If argument mismatch error. *)
+              if Array.length params == Array.length args then () else
+                raise (Error "incorrect # arguments passed");
+              let args = Array.map codegen_expr args in
+              build_call callee args "calltmp" builder
+          | Ast.If (cond, then_, else_) ->
+              let cond = codegen_expr cond in
+
+              (* Convert condition to a bool by comparing equal to 0.0 *)
+              let zero = const_float double_type 0.0 in
+              let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
+
+              (* Grab the first block so that we might later add the conditional branch
+               * to it at the end of the function. *)
+              let start_bb = insertion_block builder in
+              let the_function = block_parent start_bb in
+
+              let then_bb = append_block context "then" the_function in
+
+              (* Emit 'then' value. *)
+              position_at_end then_bb builder;
+              let then_val = codegen_expr then_ in
+
+              (* Codegen of 'then' can change the current block, update then_bb for the
+               * phi. We create a new name because one is used for the phi node, and the
+               * other is used for the conditional branch. *)
+              let new_then_bb = insertion_block builder in
+
+              (* Emit 'else' value. *)
+              let else_bb = append_block context "else" the_function in
+              position_at_end else_bb builder;
+              let else_val = codegen_expr else_ in
+
+              (* Codegen of 'else' can change the current block, update else_bb for the
+               * phi. *)
+              let new_else_bb = insertion_block builder in
+
+              (* Emit merge block. *)
+              let merge_bb = append_block context "ifcont" the_function in
+              position_at_end merge_bb builder;
+              let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
+              let phi = build_phi incoming "iftmp" builder in
+
+              (* Return to the start block to add the conditional branch. *)
+              position_at_end start_bb builder;
+              ignore (build_cond_br cond_val then_bb else_bb builder);
+
+              (* Set a unconditional branch at the end of the 'then' block and the
+               * 'else' block to the 'merge' block. *)
+              position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
+              position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
+
+              (* Finally, set the builder to the end of the merge block. *)
+              position_at_end merge_bb builder;
+
+              phi
+          | Ast.For (var_name, start, end_, step, body) ->
+              (* Emit the start code first, without 'variable' in scope. *)
+              let start_val = codegen_expr start in
+
+              (* Make the new basic block for the loop header, inserting after current
+               * block. *)
+              let preheader_bb = insertion_block builder in
+              let the_function = block_parent preheader_bb in
+              let loop_bb = append_block context "loop" the_function in
+
+              (* Insert an explicit fall through from the current block to the
+               * loop_bb. *)
+              ignore (build_br loop_bb builder);
+
+              (* Start insertion in loop_bb. *)
+              position_at_end loop_bb builder;
+
+              (* Start the PHI node with an entry for start. *)
+              let variable = build_phi [(start_val, preheader_bb)] var_name builder in
+
+              (* Within the loop, the variable is defined equal to the PHI node. If it
+               * shadows an existing variable, we have to restore it, so save it
+               * now. *)
+              let old_val =
+                try Some (Hashtbl.find named_values var_name) with Not_found -> None
+              in
+              Hashtbl.add named_values var_name variable;
+
+              (* Emit the body of the loop.  This, like any other expr, can change the
+               * current BB.  Note that we ignore the value computed by the body, but
+               * don't allow an error *)
+              ignore (codegen_expr body);
+
+              (* Emit the step value. *)
+              let step_val =
+                match step with
+                | Some step -> codegen_expr step
+                (* If not specified, use 1.0. *)
+                | None -> const_float double_type 1.0
+              in
+
+              let next_var = build_add variable step_val "nextvar" builder in
+
+              (* Compute the end condition. *)
+              let end_cond = codegen_expr end_ in
+
+              (* Convert condition to a bool by comparing equal to 0.0. *)
+              let zero = const_float double_type 0.0 in
+              let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
+
+              (* Create the "after loop" block and insert it. *)
+              let loop_end_bb = insertion_block builder in
+              let after_bb = append_block context "afterloop" the_function in
+
+              (* Insert the conditional branch into the end of loop_end_bb. *)
+              ignore (build_cond_br end_cond loop_bb after_bb builder);
+
+              (* Any new code will be inserted in after_bb. *)
+              position_at_end after_bb builder;
+
+              (* Add a new entry to the PHI node for the backedge. *)
+              add_incoming (next_var, loop_end_bb) variable;
+
+              (* Restore the unshadowed variable. *)
+              begin match old_val with
+              | Some old_val -> Hashtbl.add named_values var_name old_val
+              | None -> ()
+              end;
+
+              (* for expr always returns 0.0. *)
+              const_null double_type
+
+        let codegen_proto = function
+          | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) ->
+              (* Make the function type: double(double,double) etc. *)
+              let doubles = Array.make (Array.length args) double_type in
+              let ft = function_type double_type doubles in
+              let f =
+                match lookup_function name the_module with
+                | None -> declare_function name ft the_module
+
+                (* If 'f' conflicted, there was already something named 'name'. If it
+                 * has a body, don't allow redefinition or reextern. *)
+                | Some f ->
+                    (* If 'f' already has a body, reject this. *)
+                    if block_begin f <> At_end f then
+                      raise (Error "redefinition of function");
+
+                    (* If 'f' took a different number of arguments, reject. *)
+                    if element_type (type_of f) <> ft then
+                      raise (Error "redefinition of function with different # args");
+                    f
+              in
+
+              (* Set names for all arguments. *)
+              Array.iteri (fun i a ->
+                let n = args.(i) in
+                set_value_name n a;
+                Hashtbl.add named_values n a;
+              ) (params f);
+              f
+
+        let codegen_func the_fpm = function
+          | Ast.Function (proto, body) ->
+              Hashtbl.clear named_values;
+              let the_function = codegen_proto proto in
+
+              (* If this is an operator, install it. *)
+              begin match proto with
+              | Ast.BinOpPrototype (name, args, prec) ->
+                  let op = name.[String.length name - 1] in
+                  Hashtbl.add Parser.binop_precedence op prec;
+              | _ -> ()
+              end;
+
+              (* Create a new basic block to start insertion into. *)
+              let bb = append_block context "entry" the_function in
+              position_at_end bb builder;
+
+              try
+                let ret_val = codegen_expr body in
+
+                (* Finish off the function. *)
+                let _ = build_ret ret_val builder in
+
+                (* Validate the generated code, checking for consistency. *)
+                Llvm_analysis.assert_valid_function the_function;
+
+                (* Optimize the function. *)
+                let _ = PassManager.run_function the_function the_fpm in
+
+                the_function
+              with e ->
+                delete_function the_function;
+                raise e
+
+toplevel.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Top-Level parsing and JIT Driver
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+        open Llvm_executionengine
+
+        (* top ::= definition | external | expression | ';' *)
+        let rec main_loop the_fpm the_execution_engine stream =
+          match Stream.peek stream with
+          | None -> ()
+
+          (* ignore top-level semicolons. *)
+          | Some (Token.Kwd ';') ->
+              Stream.junk stream;
+              main_loop the_fpm the_execution_engine stream
+
+          | Some token ->
+              begin
+                try match token with
+                | Token.Def ->
+                    let e = Parser.parse_definition stream in
+                    print_endline "parsed a function definition.";
+                    dump_value (Codegen.codegen_func the_fpm e);
+                | Token.Extern ->
+                    let e = Parser.parse_extern stream in
+                    print_endline "parsed an extern.";
+                    dump_value (Codegen.codegen_proto e);
+                | _ ->
+                    (* Evaluate a top-level expression into an anonymous function. *)
+                    let e = Parser.parse_toplevel stream in
+                    print_endline "parsed a top-level expr";
+                    let the_function = Codegen.codegen_func the_fpm e in
+                    dump_value the_function;
+
+                    (* JIT the function, returning a function pointer. *)
+                    let result = ExecutionEngine.run_function the_function [||]
+                      the_execution_engine in
+
+                    print_string "Evaluated to ";
+                    print_float (GenericValue.as_float Codegen.double_type result);
+                    print_newline ();
+                with Stream.Error s | Codegen.Error s ->
+                  (* Skip token for error recovery. *)
+                  Stream.junk stream;
+                  print_endline s;
+              end;
+              print_string "ready> "; flush stdout;
+              main_loop the_fpm the_execution_engine stream
+
+toy.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Main driver code.
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+        open Llvm_executionengine
+        open Llvm_target
+        open Llvm_scalar_opts
+
+        let main () =
+          ignore (initialize_native_target ());
+
+          (* Install standard binary operators.
+           * 1 is the lowest precedence. *)
+          Hashtbl.add Parser.binop_precedence '<' 10;
+          Hashtbl.add Parser.binop_precedence '+' 20;
+          Hashtbl.add Parser.binop_precedence '-' 20;
+          Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+          (* Prime the first token. *)
+          print_string "ready> "; flush stdout;
+          let stream = Lexer.lex (Stream.of_channel stdin) in
+
+          (* Create the JIT. *)
+          let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+          let the_fpm = PassManager.create_function Codegen.the_module in
+
+          (* Set up the optimizer pipeline.  Start with registering info about how the
+           * target lays out data structures. *)
+          DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+          (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+          add_instruction_combination the_fpm;
+
+          (* reassociate expressions. *)
+          add_reassociation the_fpm;
+
+          (* Eliminate Common SubExpressions. *)
+          add_gvn the_fpm;
+
+          (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+          add_cfg_simplification the_fpm;
+
+          ignore (PassManager.initialize the_fpm);
+
+          (* Run the main "interpreter loop" now. *)
+          Toplevel.main_loop the_fpm the_execution_engine stream;
+
+          (* Print out all the generated code. *)
+          dump_module Codegen.the_module
+        ;;
+
+        main ()
+
+bindings.c
+    .. code-block:: c
+
+        #include <stdio.h>
+
+        /* putchard - putchar that takes a double and returns 0. */
+        extern double putchard(double X) {
+          putchar((char)X);
+          return 0;
+        }
+
+        /* printd - printf that takes a double prints it as "%f\n", returning 0. */
+        extern double printd(double X) {
+          printf("%f\n", X);
+          return 0;
+        }
+
+`Next: Extending the language: mutable variables / SSA
+construction <OCamlLangImpl7.html>`_
+
diff --git a/docs/tutorial/OCamlLangImpl7.html b/docs/tutorial/OCamlLangImpl7.html
deleted file mode 100644
index fd66b10c1a..0000000000
--- a/docs/tutorial/OCamlLangImpl7.html
+++ /dev/null
@@ -1,1904 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Extending the Language: Mutable Variables / SSA
-         construction</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <meta name="author" content="Erick Tryzelaar">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Extending the Language: Mutable Variables</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 7
-  <ol>
-    <li><a href="#intro">Chapter 7 Introduction</a></li>
-    <li><a href="#why">Why is this a hard problem?</a></li>
-    <li><a href="#memory">Memory in LLVM</a></li>
-    <li><a href="#kalvars">Mutable Variables in Kaleidoscope</a></li>
-    <li><a href="#adjustments">Adjusting Existing Variables for
-     Mutation</a></li>
-    <li><a href="#assignment">New Assignment Operator</a></li>
-    <li><a href="#localvars">User-defined Local Variables</a></li>
-    <li><a href="#code">Full Code Listing</a></li>
-  </ol>
-</li>
-<li><a href="OCamlLangImpl8.html">Chapter 8</a>: Conclusion and other useful LLVM
- tidbits</li>
-</ul>
-
-<div class="doc_author">
-	<p>
-		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
-		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
-	</p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="intro">Chapter 7 Introduction</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to Chapter 7 of the "<a href="index.html">Implementing a language
-with LLVM</a>" tutorial.  In chapters 1 through 6, we've built a very
-respectable, albeit simple, <a
-href="http://en.wikipedia.org/wiki/Functional_programming">functional
-programming language</a>.  In our journey, we learned some parsing techniques,
-how to build and represent an AST, how to build LLVM IR, and how to optimize
-the resultant code as well as JIT compile it.</p>
-
-<p>While Kaleidoscope is interesting as a functional language, the fact that it
-is functional makes it "too easy" to generate LLVM IR for it.  In particular, a
-functional language makes it very easy to build LLVM IR directly in <a
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">SSA form</a>.
-Since LLVM requires that the input code be in SSA form, this is a very nice
-property and it is often unclear to newcomers how to generate code for an
-imperative language with mutable variables.</p>
-
-<p>The short (and happy) summary of this chapter is that there is no need for
-your front-end to build SSA form: LLVM provides highly tuned and well tested
-support for this, though the way it works is a bit unexpected for some.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="why">Why is this a hard problem?</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-To understand why mutable variables cause complexities in SSA construction,
-consider this extremely simple C example:
-</p>
-
-<div class="doc_code">
-<pre>
-int G, H;
-int test(_Bool Condition) {
-  int X;
-  if (Condition)
-    X = G;
-  else
-    X = H;
-  return X;
-}
-</pre>
-</div>
-
-<p>In this case, we have the variable "X", whose value depends on the path
-executed in the program.  Because there are two different possible values for X
-before the return instruction, a PHI node is inserted to merge the two values.
-The LLVM IR that we want for this example looks like this:</p>
-
-<div class="doc_code">
-<pre>
-@G = weak global i32 0   ; type of @G is i32*
-@H = weak global i32 0   ; type of @H is i32*
-
-define i32 @test(i1 %Condition) {
-entry:
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-  br label %cond_next
-
-cond_next:
-  %X.2 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
-  ret i32 %X.2
-}
-</pre>
-</div>
-
-<p>In this example, the loads from the G and H global variables are explicit in
-the LLVM IR, and they live in the then/else branches of the if statement
-(cond_true/cond_false).  In order to merge the incoming values, the X.2 phi node
-in the cond_next block selects the right value to use based on where control
-flow is coming from: if control flow comes from the cond_false block, X.2 gets
-the value of X.1.  Alternatively, if control flow comes from cond_true, it gets
-the value of X.0.  The intent of this chapter is not to explain the details of
-SSA form.  For more information, see one of the many <a
-href="http://en.wikipedia.org/wiki/Static_single_assignment_form">online
-references</a>.</p>
-
-<p>The question for this article is "who places the phi nodes when lowering
-assignments to mutable variables?".  The issue here is that LLVM
-<em>requires</em> that its IR be in SSA form: there is no "non-ssa" mode for it.
-However, SSA construction requires non-trivial algorithms and data structures,
-so it is inconvenient and wasteful for every front-end to have to reproduce this
-logic.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="memory">Memory in LLVM</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>The 'trick' here is that while LLVM does require all register values to be
-in SSA form, it does not require (or permit) memory objects to be in SSA form.
-In the example above, note that the loads from G and H are direct accesses to
-G and H: they are not renamed or versioned.  This differs from some other
-compiler systems, which do try to version memory objects.  In LLVM, instead of
-encoding dataflow analysis of memory into the LLVM IR, it is handled with <a
-href="../WritingAnLLVMPass.html">Analysis Passes</a> which are computed on
-demand.</p>
-
-<p>
-With this in mind, the high-level idea is that we want to make a stack variable
-(which lives in memory, because it is on the stack) for each mutable object in
-a function.  To take advantage of this trick, we need to talk about how LLVM
-represents stack variables.
-</p>
-
-<p>In LLVM, all memory accesses are explicit with load/store instructions, and
-it is carefully designed not to have (or need) an "address-of" operator.  Notice
-how the type of the @G/@H global variables is actually "i32*" even though the
-variable is defined as "i32".  What this means is that @G defines <em>space</em>
-for an i32 in the global data area, but its <em>name</em> actually refers to the
-address for that space.  Stack variables work the same way, except that instead of
-being declared with global variable definitions, they are declared with the
-<a href="../LangRef.html#i_alloca">LLVM alloca instruction</a>:</p>
-
-<div class="doc_code">
-<pre>
-define i32 @example() {
-entry:
-  %X = alloca i32           ; type of %X is i32*.
-  ...
-  %tmp = load i32* %X       ; load the stack value %X from the stack.
-  %tmp2 = add i32 %tmp, 1   ; increment it
-  store i32 %tmp2, i32* %X  ; store it back
-  ...
-</pre>
-</div>
-
-<p>This code shows an example of how you can declare and manipulate a stack
-variable in the LLVM IR.  Stack memory allocated with the alloca instruction is
-fully general: you can pass the address of the stack slot to functions, you can
-store it in other variables, etc.  In our example above, we could rewrite the
-example to use the alloca technique to avoid using a PHI node:</p>
-
-<div class="doc_code">
-<pre>
-@G = weak global i32 0   ; type of @G is i32*
-@H = weak global i32 0   ; type of @H is i32*
-
-define i32 @test(i1 %Condition) {
-entry:
-  %X = alloca i32           ; type of %X is i32*.
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-        store i32 %X.0, i32* %X   ; Update X
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-        store i32 %X.1, i32* %X   ; Update X
-  br label %cond_next
-
-cond_next:
-  %X.2 = load i32* %X       ; Read X
-  ret i32 %X.2
-}
-</pre>
-</div>
-
-<p>With this, we have discovered a way to handle arbitrary mutable variables
-without the need to create Phi nodes at all:</p>
-
-<ol>
-<li>Each mutable variable becomes a stack allocation.</li>
-<li>Each read of the variable becomes a load from the stack.</li>
-<li>Each update of the variable becomes a store to the stack.</li>
-<li>Taking the address of a variable just uses the stack address directly.</li>
-</ol>
-
-<p>While this solution has solved our immediate problem, it introduced another
-one: we have now apparently introduced a lot of stack traffic for very simple
-and common operations, a major performance problem.  Fortunately for us, the
-LLVM optimizer has a highly-tuned optimization pass named "mem2reg" that handles
-this case, promoting allocas like this into SSA registers, inserting Phi nodes
-as appropriate.  If you run this example through the pass, for example, you'll
-get:</p>
-
-<div class="doc_code">
-<pre>
-$ <b>llvm-as &lt; example.ll | opt -mem2reg | llvm-dis</b>
-@G = weak global i32 0
-@H = weak global i32 0
-
-define i32 @test(i1 %Condition) {
-entry:
-  br i1 %Condition, label %cond_true, label %cond_false
-
-cond_true:
-  %X.0 = load i32* @G
-  br label %cond_next
-
-cond_false:
-  %X.1 = load i32* @H
-  br label %cond_next
-
-cond_next:
-  %X.01 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
-  ret i32 %X.01
-}
-</pre>
-</div>
-
-<p>The mem2reg pass implements the standard "iterated dominance frontier"
-algorithm for constructing SSA form and has a number of optimizations that speed
-up (very common) degenerate cases. The mem2reg optimization pass is the answer
-to dealing with mutable variables, and we highly recommend that you depend on
-it.  Note that mem2reg only works on variables in certain circumstances:</p>
-
-<ol>
-<li>mem2reg is alloca-driven: it looks for allocas and if it can handle them, it
-promotes them.  It does not apply to global variables or heap allocations.</li>
-
-<li>mem2reg only looks for alloca instructions in the entry block of the
-function.  Being in the entry block guarantees that the alloca is only executed
-once, which makes analysis simpler.</li>
-
-<li>mem2reg only promotes allocas whose uses are direct loads and stores.  If
-the address of the stack object is passed to a function, or if any funny pointer
-arithmetic is involved, the alloca will not be promoted.</li>
-
-<li>mem2reg only works on allocas of <a
-href="../LangRef.html#t_classifications">first class</a>
-values (such as pointers, scalars and vectors), and only if the array size
-of the allocation is 1 (or missing in the .ll file).  mem2reg is not capable of
-promoting structs or arrays to registers.  Note that the "scalarrepl" pass is
-more powerful and can promote structs, "unions", and arrays in many cases.</li>
-
-</ol>
-
-<p>
-All of these properties are easy to satisfy for most imperative languages, and
-we'll illustrate it below with Kaleidoscope.  The final question you may be
-asking is: should I bother with this nonsense for my front-end?  Wouldn't it be
-better if I just did SSA construction directly, avoiding use of the mem2reg
-optimization pass?  In short, we strongly recommend that you use this technique
-for building SSA form, unless there is an extremely good reason not to.  Using
-this technique is:</p>
-
-<ul>
-<li>Proven and well tested: llvm-gcc and clang both use this technique for local
-mutable variables.  As such, the most common clients of LLVM are using this to
-handle a bulk of their variables.  You can be sure that bugs are found fast and
-fixed early.</li>
-
-<li>Extremely Fast: mem2reg has a number of special cases that make it fast in
-common cases as well as fully general.  For example, it has fast-paths for
-variables that are only used in a single block, variables that only have one
-assignment point, good heuristics to avoid insertion of unneeded phi nodes, etc.
-</li>
-
-<li>Needed for debug info generation: <a href="../SourceLevelDebugging.html">
-Debug information in LLVM</a> relies on having the address of the variable
-exposed so that debug info can be attached to it.  This technique dovetails
-very naturally with this style of debug info.</li>
-</ul>
-
-<p>If nothing else, this makes it much easier to get your front-end up and
-running, and is very simple to implement.  Lets extend Kaleidoscope with mutable
-variables now!
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="kalvars">Mutable Variables in Kaleidoscope</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Now that we know the sort of problem we want to tackle, lets see what this
-looks like in the context of our little Kaleidoscope language.  We're going to
-add two features:</p>
-
-<ol>
-<li>The ability to mutate variables with the '=' operator.</li>
-<li>The ability to define new variables.</li>
-</ol>
-
-<p>While the first item is really what this is about, we only have variables
-for incoming arguments as well as for induction variables, and redefining those only
-goes so far :).  Also, the ability to define new variables is a
-useful thing regardless of whether you will be mutating them.  Here's a
-motivating example that shows how we could use these:</p>
-
-<div class="doc_code">
-<pre>
-# Define ':' for sequencing: as a low-precedence operator that ignores operands
-# and just returns the RHS.
-def binary : 1 (x y) y;
-
-# Recursive fib, we could do this before.
-def fib(x)
-  if (x &lt; 3) then
-    1
-  else
-    fib(x-1)+fib(x-2);
-
-# Iterative fib.
-def fibi(x)
-  <b>var a = 1, b = 1, c in</b>
-  (for i = 3, i &lt; x in
-     <b>c = a + b</b> :
-     <b>a = b</b> :
-     <b>b = c</b>) :
-  b;
-
-# Call it.
-fibi(10);
-</pre>
-</div>
-
-<p>
-In order to mutate variables, we have to change our existing variables to use
-the "alloca trick".  Once we have that, we'll add our new operator, then extend
-Kaleidoscope to support new variable definitions.
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="adjustments">Adjusting Existing Variables for Mutation</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-The symbol table in Kaleidoscope is managed at code generation time by the
-'<tt>named_values</tt>' map.  This map currently keeps track of the LLVM
-"Value*" that holds the double value for the named variable.  In order to
-support mutation, we need to change this slightly, so that it
-<tt>named_values</tt> holds the <em>memory location</em> of the variable in
-question.  Note that this change is a refactoring: it changes the structure of
-the code, but does not (by itself) change the behavior of the compiler.  All of
-these changes are isolated in the Kaleidoscope code generator.</p>
-
-<p>
-At this point in Kaleidoscope's development, it only supports variables for two
-things: incoming arguments to functions and the induction variable of 'for'
-loops.  For consistency, we'll allow mutation of these variables in addition to
-other user-defined variables.  This means that these will both need memory
-locations.
-</p>
-
-<p>To start our transformation of Kaleidoscope, we'll change the
-<tt>named_values</tt> map so that it maps to AllocaInst* instead of Value*.
-Once we do this, the C++ compiler will tell us what parts of the code we need to
-update:</p>
-
-<p><b>Note:</b> the ocaml bindings currently model both <tt>Value*</tt>s and
-<tt>AllocInst*</tt>s as <tt>Llvm.llvalue</tt>s, but this may change in the
-future to be more type safe.</p>
-
-<div class="doc_code">
-<pre>
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-</pre>
-</div>
-
-<p>Also, since we will need to create these alloca's, we'll use a helper
-function that ensures that the allocas are created in the entry block of the
-function:</p>
-
-<div class="doc_code">
-<pre>
-(* Create an alloca instruction in the entry block of the function. This
- * is used for mutable variables etc. *)
-let create_entry_block_alloca the_function var_name =
-  let builder = builder_at (instr_begin (entry_block the_function)) in
-  build_alloca double_type var_name builder
-</pre>
-</div>
-
-<p>This funny looking code creates an <tt>Llvm.llbuilder</tt> object that is
-pointing at the first instruction of the entry block.  It then creates an alloca
-with the expected name and returns it.  Because all values in Kaleidoscope are
-doubles, there is no need to pass in a type to use.</p>
-
-<p>With this in place, the first functionality change we want to make is to
-variable references.  In our new scheme, variables live on the stack, so code
-generating a reference to them actually needs to produce a load from the stack
-slot:</p>
-
-<div class="doc_code">
-<pre>
-let rec codegen_expr = function
-  ...
-  | Ast.Variable name -&gt;
-      let v = try Hashtbl.find named_values name with
-        | Not_found -&gt; raise (Error "unknown variable name")
-      in
-      <b>(* Load the value. *)
-      build_load v name builder</b>
-</pre>
-</div>
-
-<p>As you can see, this is pretty straightforward.  Now we need to update the
-things that define the variables to set up the alloca.  We'll start with
-<tt>codegen_expr Ast.For ...</tt> (see the <a href="#code">full code listing</a>
-for the unabridged code):</p>
-
-<div class="doc_code">
-<pre>
-  | Ast.For (var_name, start, end_, step, body) -&gt;
-      let the_function = block_parent (insertion_block builder) in
-
-      (* Create an alloca for the variable in the entry block. *)
-      <b>let alloca = create_entry_block_alloca the_function var_name in</b>
-
-      (* Emit the start code first, without 'variable' in scope. *)
-      let start_val = codegen_expr start in
-
-      <b>(* Store the value into the alloca. *)
-      ignore(build_store start_val alloca builder);</b>
-
-      ...
-
-      (* Within the loop, the variable is defined equal to the PHI node. If it
-       * shadows an existing variable, we have to restore it, so save it
-       * now. *)
-      let old_val =
-        try Some (Hashtbl.find named_values var_name) with Not_found -&gt; None
-      in
-      <b>Hashtbl.add named_values var_name alloca;</b>
-
-      ...
-
-      (* Compute the end condition. *)
-      let end_cond = codegen_expr end_ in
-
-      <b>(* Reload, increment, and restore the alloca. This handles the case where
-       * the body of the loop mutates the variable. *)
-      let cur_var = build_load alloca var_name builder in
-      let next_var = build_add cur_var step_val "nextvar" builder in
-      ignore(build_store next_var alloca builder);</b>
-      ...
-</pre>
-</div>
-
-<p>This code is virtually identical to the code <a
-href="OCamlLangImpl5.html#forcodegen">before we allowed mutable variables</a>.
-The big difference is that we no longer have to construct a PHI node, and we use
-load/store to access the variable as needed.</p>
-
-<p>To support mutable argument variables, we need to also make allocas for them.
-The code for this is also pretty simple:</p>
-
-<div class="doc_code">
-<pre>
-(* Create an alloca for each argument and register the argument in the symbol
- * table so that references to it will succeed. *)
-let create_argument_allocas the_function proto =
-  let args = match proto with
-    | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -&gt; args
-  in
-  Array.iteri (fun i ai -&gt;
-    let var_name = args.(i) in
-    (* Create an alloca for this variable. *)
-    let alloca = create_entry_block_alloca the_function var_name in
-
-    (* Store the initial value into the alloca. *)
-    ignore(build_store ai alloca builder);
-
-    (* Add arguments to variable symbol table. *)
-    Hashtbl.add named_values var_name alloca;
-  ) (params the_function)
-</pre>
-</div>
-
-<p>For each argument, we make an alloca, store the input value to the function
-into the alloca, and register the alloca as the memory location for the
-argument.  This method gets invoked by <tt>Codegen.codegen_func</tt> right after
-it sets up the entry block for the function.</p>
-
-<p>The final missing piece is adding the mem2reg pass, which allows us to get
-good codegen once again:</p>
-
-<div class="doc_code">
-<pre>
-let main () =
-  ...
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  <b>(* Promote allocas to registers. *)
-  add_memory_to_register_promotion the_fpm;</b>
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combining the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-</pre>
-</div>
-
-<p>It is interesting to see what the code looks like before and after the
-mem2reg optimization runs.  For example, this is the before/after code for our
-recursive fib function.  Before the optimization:</p>
-
-<div class="doc_code">
-<pre>
-define double @fib(double %x) {
-entry:
-  <b>%x1 = alloca double
-  store double %x, double* %x1
-  %x2 = load double* %x1</b>
-  %cmptmp = fcmp ult double %x2, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:    ; preds = %entry
-  br label %ifcont
-
-else:    ; preds = %entry
-  <b>%x3 = load double* %x1</b>
-  %subtmp = fsub double %x3, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  <b>%x4 = load double* %x1</b>
-  %subtmp5 = fsub double %x4, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  br label %ifcont
-
-ifcont:    ; preds = %else, %then
-  %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
-  ret double %iftmp
-}
-</pre>
-</div>
-
-<p>Here there is only one variable (x, the input argument) but you can still
-see the extremely simple-minded code generation strategy we are using.  In the
-entry block, an alloca is created, and the initial input value is stored into
-it.  Each reference to the variable does a reload from the stack.  Also, note
-that we didn't modify the if/then/else expression, so it still inserts a PHI
-node.  While we could make an alloca for it, it is actually easier to create a
-PHI node for it, so we still just make the PHI.</p>
-
-<p>Here is the code after the mem2reg pass runs:</p>
-
-<div class="doc_code">
-<pre>
-define double @fib(double %x) {
-entry:
-  %cmptmp = fcmp ult double <b>%x</b>, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp one double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %then, label %else
-
-then:
-  br label %ifcont
-
-else:
-  %subtmp = fsub double <b>%x</b>, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  %subtmp5 = fsub double <b>%x</b>, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  br label %ifcont
-
-ifcont:    ; preds = %else, %then
-  %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
-  ret double %iftmp
-}
-</pre>
-</div>
-
-<p>This is a trivial case for mem2reg, since there are no redefinitions of the
-variable.  The point of showing this is to calm your tension about inserting
-such blatent inefficiencies :).</p>
-
-<p>After the rest of the optimizers run, we get:</p>
-
-<div class="doc_code">
-<pre>
-define double @fib(double %x) {
-entry:
-  %cmptmp = fcmp ult double %x, 3.000000e+00
-  %booltmp = uitofp i1 %cmptmp to double
-  %ifcond = fcmp ueq double %booltmp, 0.000000e+00
-  br i1 %ifcond, label %else, label %ifcont
-
-else:
-  %subtmp = fsub double %x, 1.000000e+00
-  %calltmp = call double @fib(double %subtmp)
-  %subtmp5 = fsub double %x, 2.000000e+00
-  %calltmp6 = call double @fib(double %subtmp5)
-  %addtmp = fadd double %calltmp, %calltmp6
-  ret double %addtmp
-
-ifcont:
-  ret double 1.000000e+00
-}
-</pre>
-</div>
-
-<p>Here we see that the simplifycfg pass decided to clone the return instruction
-into the end of the 'else' block.  This allowed it to eliminate some branches
-and the PHI node.</p>
-
-<p>Now that all symbol table references are updated to use stack variables,
-we'll add the assignment operator.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="assignment">New Assignment Operator</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>With our current framework, adding a new assignment operator is really
-simple.  We will parse it just like any other binary operator, but handle it
-internally (instead of allowing the user to define it).  The first step is to
-set a precedence:</p>
-
-<div class="doc_code">
-<pre>
-let main () =
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  <b>Hashtbl.add Parser.binop_precedence '=' 2;</b>
-  Hashtbl.add Parser.binop_precedence '&lt;' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  ...
-</pre>
-</div>
-
-<p>Now that the parser knows the precedence of the binary operator, it takes
-care of all the parsing and AST generation.  We just need to implement codegen
-for the assignment operator.  This looks like:</p>
-
-<div class="doc_code">
-<pre>
-let rec codegen_expr = function
-      begin match op with
-      | '=' -&gt;
-          (* Special case '=' because we don't want to emit the LHS as an
-           * expression. *)
-          let name =
-            match lhs with
-            | Ast.Variable name -&gt; name
-            | _ -&gt; raise (Error "destination of '=' must be a variable")
-          in
-</pre>
-</div>
-
-<p>Unlike the rest of the binary operators, our assignment operator doesn't
-follow the "emit LHS, emit RHS, do computation" model.  As such, it is handled
-as a special case before the other binary operators are handled.  The other
-strange thing is that it requires the LHS to be a variable.  It is invalid to
-have "(x+1) = expr" - only things like "x = expr" are allowed.
-</p>
-
-
-<div class="doc_code">
-<pre>
-          (* Codegen the rhs. *)
-          let val_ = codegen_expr rhs in
-
-          (* Lookup the name. *)
-          let variable = try Hashtbl.find named_values name with
-          | Not_found -&gt; raise (Error "unknown variable name")
-          in
-          ignore(build_store val_ variable builder);
-          val_
-      | _ -&gt;
-			...
-</pre>
-</div>
-
-<p>Once we have the variable, codegen'ing the assignment is straightforward:
-we emit the RHS of the assignment, create a store, and return the computed
-value.  Returning a value allows for chained assignments like "X = (Y = Z)".</p>
-
-<p>Now that we have an assignment operator, we can mutate loop variables and
-arguments.  For example, we can now run code like this:</p>
-
-<div class="doc_code">
-<pre>
-# Function to print a double.
-extern printd(x);
-
-# Define ':' for sequencing: as a low-precedence operator that ignores operands
-# and just returns the RHS.
-def binary : 1 (x y) y;
-
-def test(x)
-  printd(x) :
-  x = 4 :
-  printd(x);
-
-test(123);
-</pre>
-</div>
-
-<p>When run, this example prints "123" and then "4", showing that we did
-actually mutate the value!  Okay, we have now officially implemented our goal:
-getting this to work requires SSA construction in the general case.  However,
-to be really useful, we want the ability to define our own local variables, lets
-add this next!
-</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="localvars">User-defined Local Variables</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Adding var/in is just like any other other extensions we made to
-Kaleidoscope: we extend the lexer, the parser, the AST and the code generator.
-The first step for adding our new 'var/in' construct is to extend the lexer.
-As before, this is pretty trivial, the code looks like this:</p>
-
-<div class="doc_code">
-<pre>
-type token =
-  ...
-  <b>(* var definition *)
-  | Var</b>
-
-...
-
-and lex_ident buffer = parser
-      ...
-      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
-      | "binary" -&gt; [&lt; 'Token.Binary; stream &gt;]
-      | "unary" -&gt; [&lt; 'Token.Unary; stream &gt;]
-      <b>| "var" -&gt; [&lt; 'Token.Var; stream &gt;]</b>
-      ...
-</pre>
-</div>
-
-<p>The next step is to define the AST node that we will construct.  For var/in,
-it looks like this:</p>
-
-<div class="doc_code">
-<pre>
-type expr =
-  ...
-  (* variant for var/in. *)
-  | Var of (string * expr option) array * expr
-  ...
-</pre>
-</div>
-
-<p>var/in allows a list of names to be defined all at once, and each name can
-optionally have an initializer value.  As such, we capture this information in
-the VarNames vector.  Also, var/in has a body, this body is allowed to access
-the variables defined by the var/in.</p>
-
-<p>With this in place, we can define the parser pieces.  The first thing we do
-is add it as a primary expression:</p>
-
-<div class="doc_code">
-<pre>
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr
- *   ::= ifexpr
- *   ::= forexpr
- <b>*   ::= varexpr</b> *)
-let rec parse_primary = parser
-  ...
-  <b>(* varexpr
-   *   ::= 'var' identifier ('=' expression?
-   *             (',' identifier ('=' expression)?)* 'in' expression *)
-  | [&lt; 'Token.Var;
-       (* At least one variable name is required. *)
-       'Token.Ident id ?? "expected identifier after var";
-       init=parse_var_init;
-       var_names=parse_var_names [(id, init)];
-       (* At this point, we have to have 'in'. *)
-       'Token.In ?? "expected 'in' keyword after 'var'";
-       body=parse_expr &gt;] -&gt;
-      Ast.Var (Array.of_list (List.rev var_names), body)</b>
-
-...
-
-and parse_var_init = parser
-  (* read in the optional initializer. *)
-  | [&lt; 'Token.Kwd '='; e=parse_expr &gt;] -&gt; Some e
-  | [&lt; &gt;] -&gt; None
-
-and parse_var_names accumulator = parser
-  | [&lt; 'Token.Kwd ',';
-       'Token.Ident id ?? "expected identifier list after var";
-       init=parse_var_init;
-       e=parse_var_names ((id, init) :: accumulator) &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; accumulator
-</pre>
-</div>
-
-<p>Now that we can parse and represent the code, we need to support emission of
-LLVM IR for it.  This code starts out with:</p>
-
-<div class="doc_code">
-<pre>
-let rec codegen_expr = function
-  ...
-  | Ast.Var (var_names, body)
-      let old_bindings = ref [] in
-
-      let the_function = block_parent (insertion_block builder) in
-
-      (* Register all variables and emit their initializer. *)
-      Array.iter (fun (var_name, init) -&gt;
-</pre>
-</div>
-
-<p>Basically it loops over all the variables, installing them one at a time.
-For each variable we put into the symbol table, we remember the previous value
-that we replace in OldBindings.</p>
-
-<div class="doc_code">
-<pre>
-        (* Emit the initializer before adding the variable to scope, this
-         * prevents the initializer from referencing the variable itself, and
-         * permits stuff like this:
-         *   var a = 1 in
-         *     var a = a in ...   # refers to outer 'a'. *)
-        let init_val =
-          match init with
-          | Some init -&gt; codegen_expr init
-          (* If not specified, use 0.0. *)
-          | None -&gt; const_float double_type 0.0
-        in
-
-        let alloca = create_entry_block_alloca the_function var_name in
-        ignore(build_store init_val alloca builder);
-
-        (* Remember the old variable binding so that we can restore the binding
-         * when we unrecurse. *)
-
-        begin
-          try
-            let old_value = Hashtbl.find named_values var_name in
-            old_bindings := (var_name, old_value) :: !old_bindings;
-          with Not_found &gt; ()
-        end;
-
-        (* Remember this binding. *)
-        Hashtbl.add named_values var_name alloca;
-      ) var_names;
-</pre>
-</div>
-
-<p>There are more comments here than code.  The basic idea is that we emit the
-initializer, create the alloca, then update the symbol table to point to it.
-Once all the variables are installed in the symbol table, we evaluate the body
-of the var/in expression:</p>
-
-<div class="doc_code">
-<pre>
-      (* Codegen the body, now that all vars are in scope. *)
-      let body_val = codegen_expr body in
-</pre>
-</div>
-
-<p>Finally, before returning, we restore the previous variable bindings:</p>
-
-<div class="doc_code">
-<pre>
-      (* Pop all our variables from scope. *)
-      List.iter (fun (var_name, old_value) -&gt;
-        Hashtbl.add named_values var_name old_value
-      ) !old_bindings;
-
-      (* Return the body computation. *)
-      body_val
-</pre>
-</div>
-
-<p>The end result of all of this is that we get properly scoped variable
-definitions, and we even (trivially) allow mutation of them :).</p>
-
-<p>With this, we completed what we set out to do.  Our nice iterative fib
-example from the intro compiles and runs just fine.  The mem2reg pass optimizes
-all of our stack variables into SSA registers, inserting PHI nodes where needed,
-and our front-end remains simple: no "iterated dominance frontier" computation
-anywhere in sight.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="code">Full Code Listing</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>
-Here is the complete code listing for our running example, enhanced with mutable
-variables and var/in support.  To build this example, use:
-</p>
-
-<div class="doc_code">
-<pre>
-# Compile
-ocamlbuild toy.byte
-# Run
-./toy.byte
-</pre>
-</div>
-
-<p>Here is the code:</p>
-
-<dl>
-<dt>_tags:</dt>
-<dd class="doc_code">
-<pre>
-&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
-&lt;*.{byte,native}&gt;: g++, use_llvm, use_llvm_analysis
-&lt;*.{byte,native}&gt;: use_llvm_executionengine, use_llvm_target
-&lt;*.{byte,native}&gt;: use_llvm_scalar_opts, use_bindings
-</pre>
-</dd>
-
-<dt>myocamlbuild.ml:</dt>
-<dd class="doc_code">
-<pre>
-open Ocamlbuild_plugin;;
-
-ocaml_lib ~extern:true "llvm";;
-ocaml_lib ~extern:true "llvm_analysis";;
-ocaml_lib ~extern:true "llvm_executionengine";;
-ocaml_lib ~extern:true "llvm_target";;
-ocaml_lib ~extern:true "llvm_scalar_opts";;
-
-flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);;
-dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
-</pre>
-</dd>
-
-<dt>token.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer Tokens
- *===----------------------------------------------------------------------===*)
-
-(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
- * these others for known things. *)
-type token =
-  (* commands *)
-  | Def | Extern
-
-  (* primary *)
-  | Ident of string | Number of float
-
-  (* unknown *)
-  | Kwd of char
-
-  (* control *)
-  | If | Then | Else
-  | For | In
-
-  (* operators *)
-  | Binary | Unary
-
-  (* var definition *)
-  | Var
-</pre>
-</dd>
-
-<dt>lexer.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Lexer
- *===----------------------------------------------------------------------===*)
-
-let rec lex = parser
-  (* Skip any whitespace. *)
-  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
-
-  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-
-  (* number: [0-9.]+ *)
-  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
-      let buffer = Buffer.create 1 in
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-
-  (* Comment until end of line. *)
-  | [&lt; ' ('#'); stream &gt;] -&gt;
-      lex_comment stream
-
-  (* Otherwise, just return the character as its ascii value. *)
-  | [&lt; 'c; stream &gt;] -&gt;
-      [&lt; 'Token.Kwd c; lex stream &gt;]
-
-  (* end of stream. *)
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-
-and lex_number buffer = parser
-  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_number buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
-
-and lex_ident buffer = parser
-  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
-      Buffer.add_char buffer c;
-      lex_ident buffer stream
-  | [&lt; stream=lex &gt;] -&gt;
-      match Buffer.contents buffer with
-      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
-      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
-      | "if" -&gt; [&lt; 'Token.If; stream &gt;]
-      | "then" -&gt; [&lt; 'Token.Then; stream &gt;]
-      | "else" -&gt; [&lt; 'Token.Else; stream &gt;]
-      | "for" -&gt; [&lt; 'Token.For; stream &gt;]
-      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
-      | "binary" -&gt; [&lt; 'Token.Binary; stream &gt;]
-      | "unary" -&gt; [&lt; 'Token.Unary; stream &gt;]
-      | "var" -&gt; [&lt; 'Token.Var; stream &gt;]
-      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
-
-and lex_comment = parser
-  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
-  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; [&lt; &gt;]
-</pre>
-</dd>
-
-<dt>ast.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Abstract Syntax Tree (aka Parse Tree)
- *===----------------------------------------------------------------------===*)
-
-(* expr - Base type for all expression nodes. *)
-type expr =
-  (* variant for numeric literals like "1.0". *)
-  | Number of float
-
-  (* variant for referencing a variable, like "a". *)
-  | Variable of string
-
-  (* variant for a unary operator. *)
-  | Unary of char * expr
-
-  (* variant for a binary operator. *)
-  | Binary of char * expr * expr
-
-  (* variant for function calls. *)
-  | Call of string * expr array
-
-  (* variant for if/then/else. *)
-  | If of expr * expr * expr
-
-  (* variant for for/in. *)
-  | For of string * expr * expr * expr option * expr
-
-  (* variant for var/in. *)
-  | Var of (string * expr option) array * expr
-
-(* proto - This type represents the "prototype" for a function, which captures
- * its name, and its argument names (thus implicitly the number of arguments the
- * function takes). *)
-type proto =
-  | Prototype of string * string array
-  | BinOpPrototype of string * string array * int
-
-(* func - This type represents a function definition itself. *)
-type func = Function of proto * expr
-</pre>
-</dd>
-
-<dt>parser.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===---------------------------------------------------------------------===
- * Parser
- *===---------------------------------------------------------------------===*)
-
-(* binop_precedence - This holds the precedence for each binary operator that is
- * defined *)
-let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
-
-(* precedence - Get the precedence of the pending binary operator token. *)
-let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
-
-(* primary
- *   ::= identifier
- *   ::= numberexpr
- *   ::= parenexpr
- *   ::= ifexpr
- *   ::= forexpr
- *   ::= varexpr *)
-let rec parse_primary = parser
-  (* numberexpr ::= number *)
-  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
-
-  (* parenexpr ::= '(' expression ')' *)
-  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
-
-  (* identifierexpr
-   *   ::= identifier
-   *   ::= identifier '(' argumentexpr ')' *)
-  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
-      let rec parse_args accumulator = parser
-        | [&lt; e=parse_expr; stream &gt;] -&gt;
-            begin parser
-              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
-              | [&lt; &gt;] -&gt; e :: accumulator
-            end stream
-        | [&lt; &gt;] -&gt; accumulator
-      in
-      let rec parse_ident id = parser
-        (* Call. *)
-        | [&lt; 'Token.Kwd '(';
-             args=parse_args [];
-             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
-            Ast.Call (id, Array.of_list (List.rev args))
-
-        (* Simple variable ref. *)
-        | [&lt; &gt;] -&gt; Ast.Variable id
-      in
-      parse_ident id stream
-
-  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
-  | [&lt; 'Token.If; c=parse_expr;
-       'Token.Then ?? "expected 'then'"; t=parse_expr;
-       'Token.Else ?? "expected 'else'"; e=parse_expr &gt;] -&gt;
-      Ast.If (c, t, e)
-
-  (* forexpr
-        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
-  | [&lt; 'Token.For;
-       'Token.Ident id ?? "expected identifier after for";
-       'Token.Kwd '=' ?? "expected '=' after for";
-       stream &gt;] -&gt;
-      begin parser
-        | [&lt;
-             start=parse_expr;
-             'Token.Kwd ',' ?? "expected ',' after for";
-             end_=parse_expr;
-             stream &gt;] -&gt;
-            let step =
-              begin parser
-              | [&lt; 'Token.Kwd ','; step=parse_expr &gt;] -&gt; Some step
-              | [&lt; &gt;] -&gt; None
-              end stream
-            in
-            begin parser
-            | [&lt; 'Token.In; body=parse_expr &gt;] -&gt;
-                Ast.For (id, start, end_, step, body)
-            | [&lt; &gt;] -&gt;
-                raise (Stream.Error "expected 'in' after for")
-            end stream
-        | [&lt; &gt;] -&gt;
-            raise (Stream.Error "expected '=' after for")
-      end stream
-
-  (* varexpr
-   *   ::= 'var' identifier ('=' expression?
-   *             (',' identifier ('=' expression)?)* 'in' expression *)
-  | [&lt; 'Token.Var;
-       (* At least one variable name is required. *)
-       'Token.Ident id ?? "expected identifier after var";
-       init=parse_var_init;
-       var_names=parse_var_names [(id, init)];
-       (* At this point, we have to have 'in'. *)
-       'Token.In ?? "expected 'in' keyword after 'var'";
-       body=parse_expr &gt;] -&gt;
-      Ast.Var (Array.of_list (List.rev var_names), body)
-
-  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
-
-(* unary
- *   ::= primary
- *   ::= '!' unary *)
-and parse_unary = parser
-  (* If this is a unary operator, read it. *)
-  | [&lt; 'Token.Kwd op when op != '(' &amp;&amp; op != ')'; operand=parse_expr &gt;] -&gt;
-      Ast.Unary (op, operand)
-
-  (* If the current token is not an operator, it must be a primary expr. *)
-  | [&lt; stream &gt;] -&gt; parse_primary stream
-
-(* binoprhs
- *   ::= ('+' primary)* *)
-and parse_bin_rhs expr_prec lhs stream =
-  match Stream.peek stream with
-  (* If this is a binop, find its precedence. *)
-  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
-      let token_prec = precedence c in
-
-      (* If this is a binop that binds at least as tightly as the current binop,
-       * consume it, otherwise we are done. *)
-      if token_prec &lt; expr_prec then lhs else begin
-        (* Eat the binop. *)
-        Stream.junk stream;
-
-        (* Parse the primary expression after the binary operator. *)
-        let rhs = parse_unary stream in
-
-        (* Okay, we know this is a binop. *)
-        let rhs =
-          match Stream.peek stream with
-          | Some (Token.Kwd c2) -&gt;
-              (* If BinOp binds less tightly with rhs than the operator after
-               * rhs, let the pending operator take rhs as its lhs. *)
-              let next_prec = precedence c2 in
-              if token_prec &lt; next_prec
-              then parse_bin_rhs (token_prec + 1) rhs stream
-              else rhs
-          | _ -&gt; rhs
-        in
-
-        (* Merge lhs/rhs. *)
-        let lhs = Ast.Binary (c, lhs, rhs) in
-        parse_bin_rhs expr_prec lhs stream
-      end
-  | _ -&gt; lhs
-
-and parse_var_init = parser
-  (* read in the optional initializer. *)
-  | [&lt; 'Token.Kwd '='; e=parse_expr &gt;] -&gt; Some e
-  | [&lt; &gt;] -&gt; None
-
-and parse_var_names accumulator = parser
-  | [&lt; 'Token.Kwd ',';
-       'Token.Ident id ?? "expected identifier list after var";
-       init=parse_var_init;
-       e=parse_var_names ((id, init) :: accumulator) &gt;] -&gt; e
-  | [&lt; &gt;] -&gt; accumulator
-
-(* expression
- *   ::= primary binoprhs *)
-and parse_expr = parser
-  | [&lt; lhs=parse_unary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
-
-(* prototype
- *   ::= id '(' id* ')'
- *   ::= binary LETTER number? (id, id)
- *   ::= unary LETTER number? (id) *)
-let parse_prototype =
-  let rec parse_args accumulator = parser
-    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
-    | [&lt; &gt;] -&gt; accumulator
-  in
-  let parse_operator = parser
-    | [&lt; 'Token.Unary &gt;] -&gt; "unary", 1
-    | [&lt; 'Token.Binary &gt;] -&gt; "binary", 2
-  in
-  let parse_binary_precedence = parser
-    | [&lt; 'Token.Number n &gt;] -&gt; int_of_float n
-    | [&lt; &gt;] -&gt; 30
-  in
-  parser
-  | [&lt; 'Token.Ident id;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-       args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      (* success. *)
-      Ast.Prototype (id, Array.of_list (List.rev args))
-  | [&lt; (prefix, kind)=parse_operator;
-       'Token.Kwd op ?? "expected an operator";
-       (* Read the precedence if present. *)
-       binary_precedence=parse_binary_precedence;
-       'Token.Kwd '(' ?? "expected '(' in prototype";
-        args=parse_args [];
-       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
-      let name = prefix ^ (String.make 1 op) in
-      let args = Array.of_list (List.rev args) in
-
-      (* Verify right number of arguments for operator. *)
-      if Array.length args != kind
-      then raise (Stream.Error "invalid number of operands for operator")
-      else
-        if kind == 1 then
-          Ast.Prototype (name, args)
-        else
-          Ast.BinOpPrototype (name, args, binary_precedence)
-  | [&lt; &gt;] -&gt;
-      raise (Stream.Error "expected function name in prototype")
-
-(* definition ::= 'def' prototype expression *)
-let parse_definition = parser
-  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
-      Ast.Function (p, e)
-
-(* toplevelexpr ::= expression *)
-let parse_toplevel = parser
-  | [&lt; e=parse_expr &gt;] -&gt;
-      (* Make an anonymous proto. *)
-      Ast.Function (Ast.Prototype ("", [||]), e)
-
-(*  external ::= 'extern' prototype *)
-let parse_extern = parser
-  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
-</pre>
-</dd>
-
-<dt>codegen.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Code Generation
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-
-exception Error of string
-
-let context = global_context ()
-let the_module = create_module context "my cool jit"
-let builder = builder context
-let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
-let double_type = double_type context
-
-(* Create an alloca instruction in the entry block of the function. This
- * is used for mutable variables etc. *)
-let create_entry_block_alloca the_function var_name =
-  let builder = builder_at context (instr_begin (entry_block the_function)) in
-  build_alloca double_type var_name builder
-
-let rec codegen_expr = function
-  | Ast.Number n -&gt; const_float double_type n
-  | Ast.Variable name -&gt;
-      let v = try Hashtbl.find named_values name with
-        | Not_found -&gt; raise (Error "unknown variable name")
-      in
-      (* Load the value. *)
-      build_load v name builder
-  | Ast.Unary (op, operand) -&gt;
-      let operand = codegen_expr operand in
-      let callee = "unary" ^ (String.make 1 op) in
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown unary operator")
-      in
-      build_call callee [|operand|] "unop" builder
-  | Ast.Binary (op, lhs, rhs) -&gt;
-      begin match op with
-      | '=' -&gt;
-          (* Special case '=' because we don't want to emit the LHS as an
-           * expression. *)
-          let name =
-            match lhs with
-            | Ast.Variable name -&gt; name
-            | _ -&gt; raise (Error "destination of '=' must be a variable")
-          in
-
-          (* Codegen the rhs. *)
-          let val_ = codegen_expr rhs in
-
-          (* Lookup the name. *)
-          let variable = try Hashtbl.find named_values name with
-          | Not_found -&gt; raise (Error "unknown variable name")
-          in
-          ignore(build_store val_ variable builder);
-          val_
-      | _ -&gt;
-          let lhs_val = codegen_expr lhs in
-          let rhs_val = codegen_expr rhs in
-          begin
-            match op with
-            | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
-            | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
-            | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
-            | '&lt;' -&gt;
-                (* Convert bool 0/1 to double 0.0 or 1.0 *)
-                let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
-                build_uitofp i double_type "booltmp" builder
-            | _ -&gt;
-                (* If it wasn't a builtin binary operator, it must be a user defined
-                 * one. Emit a call to it. *)
-                let callee = "binary" ^ (String.make 1 op) in
-                let callee =
-                  match lookup_function callee the_module with
-                  | Some callee -&gt; callee
-                  | None -&gt; raise (Error "binary operator not found!")
-                in
-                build_call callee [|lhs_val; rhs_val|] "binop" builder
-          end
-      end
-  | Ast.Call (callee, args) -&gt;
-      (* Look up the name in the module table. *)
-      let callee =
-        match lookup_function callee the_module with
-        | Some callee -&gt; callee
-        | None -&gt; raise (Error "unknown function referenced")
-      in
-      let params = params callee in
-
-      (* If argument mismatch error. *)
-      if Array.length params == Array.length args then () else
-        raise (Error "incorrect # arguments passed");
-      let args = Array.map codegen_expr args in
-      build_call callee args "calltmp" builder
-  | Ast.If (cond, then_, else_) -&gt;
-      let cond = codegen_expr cond in
-
-      (* Convert condition to a bool by comparing equal to 0.0 *)
-      let zero = const_float double_type 0.0 in
-      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
-
-      (* Grab the first block so that we might later add the conditional branch
-       * to it at the end of the function. *)
-      let start_bb = insertion_block builder in
-      let the_function = block_parent start_bb in
-
-      let then_bb = append_block context "then" the_function in
-
-      (* Emit 'then' value. *)
-      position_at_end then_bb builder;
-      let then_val = codegen_expr then_ in
-
-      (* Codegen of 'then' can change the current block, update then_bb for the
-       * phi. We create a new name because one is used for the phi node, and the
-       * other is used for the conditional branch. *)
-      let new_then_bb = insertion_block builder in
-
-      (* Emit 'else' value. *)
-      let else_bb = append_block context "else" the_function in
-      position_at_end else_bb builder;
-      let else_val = codegen_expr else_ in
-
-      (* Codegen of 'else' can change the current block, update else_bb for the
-       * phi. *)
-      let new_else_bb = insertion_block builder in
-
-      (* Emit merge block. *)
-      let merge_bb = append_block context "ifcont" the_function in
-      position_at_end merge_bb builder;
-      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
-      let phi = build_phi incoming "iftmp" builder in
-
-      (* Return to the start block to add the conditional branch. *)
-      position_at_end start_bb builder;
-      ignore (build_cond_br cond_val then_bb else_bb builder);
-
-      (* Set a unconditional branch at the end of the 'then' block and the
-       * 'else' block to the 'merge' block. *)
-      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
-      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
-
-      (* Finally, set the builder to the end of the merge block. *)
-      position_at_end merge_bb builder;
-
-      phi
-  | Ast.For (var_name, start, end_, step, body) -&gt;
-      (* Output this as:
-       *   var = alloca double
-       *   ...
-       *   start = startexpr
-       *   store start -&gt; var
-       *   goto loop
-       * loop:
-       *   ...
-       *   bodyexpr
-       *   ...
-       * loopend:
-       *   step = stepexpr
-       *   endcond = endexpr
-       *
-       *   curvar = load var
-       *   nextvar = curvar + step
-       *   store nextvar -&gt; var
-       *   br endcond, loop, endloop
-       * outloop: *)
-
-      let the_function = block_parent (insertion_block builder) in
-
-      (* Create an alloca for the variable in the entry block. *)
-      let alloca = create_entry_block_alloca the_function var_name in
-
-      (* Emit the start code first, without 'variable' in scope. *)
-      let start_val = codegen_expr start in
-
-      (* Store the value into the alloca. *)
-      ignore(build_store start_val alloca builder);
-
-      (* Make the new basic block for the loop header, inserting after current
-       * block. *)
-      let loop_bb = append_block context "loop" the_function in
-
-      (* Insert an explicit fall through from the current block to the
-       * loop_bb. *)
-      ignore (build_br loop_bb builder);
-
-      (* Start insertion in loop_bb. *)
-      position_at_end loop_bb builder;
-
-      (* Within the loop, the variable is defined equal to the PHI node. If it
-       * shadows an existing variable, we have to restore it, so save it
-       * now. *)
-      let old_val =
-        try Some (Hashtbl.find named_values var_name) with Not_found -&gt; None
-      in
-      Hashtbl.add named_values var_name alloca;
-
-      (* Emit the body of the loop.  This, like any other expr, can change the
-       * current BB.  Note that we ignore the value computed by the body, but
-       * don't allow an error *)
-      ignore (codegen_expr body);
-
-      (* Emit the step value. *)
-      let step_val =
-        match step with
-        | Some step -&gt; codegen_expr step
-        (* If not specified, use 1.0. *)
-        | None -&gt; const_float double_type 1.0
-      in
-
-      (* Compute the end condition. *)
-      let end_cond = codegen_expr end_ in
-
-      (* Reload, increment, and restore the alloca. This handles the case where
-       * the body of the loop mutates the variable. *)
-      let cur_var = build_load alloca var_name builder in
-      let next_var = build_add cur_var step_val "nextvar" builder in
-      ignore(build_store next_var alloca builder);
-
-      (* Convert condition to a bool by comparing equal to 0.0. *)
-      let zero = const_float double_type 0.0 in
-      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
-
-      (* Create the "after loop" block and insert it. *)
-      let after_bb = append_block context "afterloop" the_function in
-
-      (* Insert the conditional branch into the end of loop_end_bb. *)
-      ignore (build_cond_br end_cond loop_bb after_bb builder);
-
-      (* Any new code will be inserted in after_bb. *)
-      position_at_end after_bb builder;
-
-      (* Restore the unshadowed variable. *)
-      begin match old_val with
-      | Some old_val -&gt; Hashtbl.add named_values var_name old_val
-      | None -&gt; ()
-      end;
-
-      (* for expr always returns 0.0. *)
-      const_null double_type
-  | Ast.Var (var_names, body) -&gt;
-      let old_bindings = ref [] in
-
-      let the_function = block_parent (insertion_block builder) in
-
-      (* Register all variables and emit their initializer. *)
-      Array.iter (fun (var_name, init) -&gt;
-        (* Emit the initializer before adding the variable to scope, this
-         * prevents the initializer from referencing the variable itself, and
-         * permits stuff like this:
-         *   var a = 1 in
-         *     var a = a in ...   # refers to outer 'a'. *)
-        let init_val =
-          match init with
-          | Some init -&gt; codegen_expr init
-          (* If not specified, use 0.0. *)
-          | None -&gt; const_float double_type 0.0
-        in
-
-        let alloca = create_entry_block_alloca the_function var_name in
-        ignore(build_store init_val alloca builder);
-
-        (* Remember the old variable binding so that we can restore the binding
-         * when we unrecurse. *)
-        begin
-          try
-            let old_value = Hashtbl.find named_values var_name in
-            old_bindings := (var_name, old_value) :: !old_bindings;
-          with Not_found -&gt; ()
-        end;
-
-        (* Remember this binding. *)
-        Hashtbl.add named_values var_name alloca;
-      ) var_names;
-
-      (* Codegen the body, now that all vars are in scope. *)
-      let body_val = codegen_expr body in
-
-      (* Pop all our variables from scope. *)
-      List.iter (fun (var_name, old_value) -&gt;
-        Hashtbl.add named_values var_name old_value
-      ) !old_bindings;
-
-      (* Return the body computation. *)
-      body_val
-
-let codegen_proto = function
-  | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) -&gt;
-      (* Make the function type: double(double,double) etc. *)
-      let doubles = Array.make (Array.length args) double_type in
-      let ft = function_type double_type doubles in
-      let f =
-        match lookup_function name the_module with
-        | None -&gt; declare_function name ft the_module
-
-        (* If 'f' conflicted, there was already something named 'name'. If it
-         * has a body, don't allow redefinition or reextern. *)
-        | Some f -&gt;
-            (* If 'f' already has a body, reject this. *)
-            if block_begin f &lt;&gt; At_end f then
-              raise (Error "redefinition of function");
-
-            (* If 'f' took a different number of arguments, reject. *)
-            if element_type (type_of f) &lt;&gt; ft then
-              raise (Error "redefinition of function with different # args");
-            f
-      in
-
-      (* Set names for all arguments. *)
-      Array.iteri (fun i a -&gt;
-        let n = args.(i) in
-        set_value_name n a;
-        Hashtbl.add named_values n a;
-      ) (params f);
-      f
-
-(* Create an alloca for each argument and register the argument in the symbol
- * table so that references to it will succeed. *)
-let create_argument_allocas the_function proto =
-  let args = match proto with
-    | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -&gt; args
-  in
-  Array.iteri (fun i ai -&gt;
-    let var_name = args.(i) in
-    (* Create an alloca for this variable. *)
-    let alloca = create_entry_block_alloca the_function var_name in
-
-    (* Store the initial value into the alloca. *)
-    ignore(build_store ai alloca builder);
-
-    (* Add arguments to variable symbol table. *)
-    Hashtbl.add named_values var_name alloca;
-  ) (params the_function)
-
-let codegen_func the_fpm = function
-  | Ast.Function (proto, body) -&gt;
-      Hashtbl.clear named_values;
-      let the_function = codegen_proto proto in
-
-      (* If this is an operator, install it. *)
-      begin match proto with
-      | Ast.BinOpPrototype (name, args, prec) -&gt;
-          let op = name.[String.length name - 1] in
-          Hashtbl.add Parser.binop_precedence op prec;
-      | _ -&gt; ()
-      end;
-
-      (* Create a new basic block to start insertion into. *)
-      let bb = append_block context "entry" the_function in
-      position_at_end bb builder;
-
-      try
-        (* Add all arguments to the symbol table and create their allocas. *)
-        create_argument_allocas the_function proto;
-
-        let ret_val = codegen_expr body in
-
-        (* Finish off the function. *)
-        let _ = build_ret ret_val builder in
-
-        (* Validate the generated code, checking for consistency. *)
-        Llvm_analysis.assert_valid_function the_function;
-
-        (* Optimize the function. *)
-        let _ = PassManager.run_function the_function the_fpm in
-
-        the_function
-      with e -&gt;
-        delete_function the_function;
-        raise e
-</pre>
-</dd>
-
-<dt>toplevel.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Top-Level parsing and JIT Driver
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-
-(* top ::= definition | external | expression | ';' *)
-let rec main_loop the_fpm the_execution_engine stream =
-  match Stream.peek stream with
-  | None -&gt; ()
-
-  (* ignore top-level semicolons. *)
-  | Some (Token.Kwd ';') -&gt;
-      Stream.junk stream;
-      main_loop the_fpm the_execution_engine stream
-
-  | Some token -&gt;
-      begin
-        try match token with
-        | Token.Def -&gt;
-            let e = Parser.parse_definition stream in
-            print_endline "parsed a function definition.";
-            dump_value (Codegen.codegen_func the_fpm e);
-        | Token.Extern -&gt;
-            let e = Parser.parse_extern stream in
-            print_endline "parsed an extern.";
-            dump_value (Codegen.codegen_proto e);
-        | _ -&gt;
-            (* Evaluate a top-level expression into an anonymous function. *)
-            let e = Parser.parse_toplevel stream in
-            print_endline "parsed a top-level expr";
-            let the_function = Codegen.codegen_func the_fpm e in
-            dump_value the_function;
-
-            (* JIT the function, returning a function pointer. *)
-            let result = ExecutionEngine.run_function the_function [||]
-              the_execution_engine in
-
-            print_string "Evaluated to ";
-            print_float (GenericValue.as_float Codegen.double_type result);
-            print_newline ();
-        with Stream.Error s | Codegen.Error s -&gt;
-          (* Skip token for error recovery. *)
-          Stream.junk stream;
-          print_endline s;
-      end;
-      print_string "ready&gt; "; flush stdout;
-      main_loop the_fpm the_execution_engine stream
-</pre>
-</dd>
-
-<dt>toy.ml:</dt>
-<dd class="doc_code">
-<pre>
-(*===----------------------------------------------------------------------===
- * Main driver code.
- *===----------------------------------------------------------------------===*)
-
-open Llvm
-open Llvm_executionengine
-open Llvm_target
-open Llvm_scalar_opts
-
-let main () =
-  ignore (initialize_native_target ());
-
-  (* Install standard binary operators.
-   * 1 is the lowest precedence. *)
-  Hashtbl.add Parser.binop_precedence '=' 2;
-  Hashtbl.add Parser.binop_precedence '&lt;' 10;
-  Hashtbl.add Parser.binop_precedence '+' 20;
-  Hashtbl.add Parser.binop_precedence '-' 20;
-  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
-
-  (* Prime the first token. *)
-  print_string "ready&gt; "; flush stdout;
-  let stream = Lexer.lex (Stream.of_channel stdin) in
-
-  (* Create the JIT. *)
-  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
-  let the_fpm = PassManager.create_function Codegen.the_module in
-
-  (* Set up the optimizer pipeline.  Start with registering info about how the
-   * target lays out data structures. *)
-  DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
-
-  (* Promote allocas to registers. *)
-  add_memory_to_register_promotion the_fpm;
-
-  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
-  add_instruction_combination the_fpm;
-
-  (* reassociate expressions. *)
-  add_reassociation the_fpm;
-
-  (* Eliminate Common SubExpressions. *)
-  add_gvn the_fpm;
-
-  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
-  add_cfg_simplification the_fpm;
-
-  ignore (PassManager.initialize the_fpm);
-
-  (* Run the main "interpreter loop" now. *)
-  Toplevel.main_loop the_fpm the_execution_engine stream;
-
-  (* Print out all the generated code. *)
-  dump_module Codegen.the_module
-;;
-
-main ()
-</pre>
-</dd>
-
-<dt>bindings.c</dt>
-<dd class="doc_code">
-<pre>
-#include &lt;stdio.h&gt;
-
-/* putchard - putchar that takes a double and returns 0. */
-extern double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
-/* printd - printf that takes a double prints it as "%f\n", returning 0. */
-extern double printd(double X) {
-  printf("%f\n", X);
-  return 0;
-}
-</pre>
-</dd>
-</dl>
-
-<a href="OCamlLangImpl8.html">Next: Conclusion and other useful LLVM tidbits</a>
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/OCamlLangImpl7.rst b/docs/tutorial/OCamlLangImpl7.rst
new file mode 100644
index 0000000000..07da3a8ff9
--- /dev/null
+++ b/docs/tutorial/OCamlLangImpl7.rst
@@ -0,0 +1,1726 @@
+=======================================================
+Kaleidoscope: Extending the Language: Mutable Variables
+=======================================================
+
+.. contents::
+   :local:
+
+Written by `Chris Lattner <mailto:sabre@nondot.org>`_ and `Erick
+Tryzelaar <mailto:idadesub@users.sourceforge.net>`_
+
+Chapter 7 Introduction
+======================
+
+Welcome to Chapter 7 of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. In chapters 1 through 6, we've built a
+very respectable, albeit simple, `functional programming
+language <http://en.wikipedia.org/wiki/Functional_programming>`_. In our
+journey, we learned some parsing techniques, how to build and represent
+an AST, how to build LLVM IR, and how to optimize the resultant code as
+well as JIT compile it.
+
+While Kaleidoscope is interesting as a functional language, the fact
+that it is functional makes it "too easy" to generate LLVM IR for it. In
+particular, a functional language makes it very easy to build LLVM IR
+directly in `SSA
+form <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_.
+Since LLVM requires that the input code be in SSA form, this is a very
+nice property and it is often unclear to newcomers how to generate code
+for an imperative language with mutable variables.
+
+The short (and happy) summary of this chapter is that there is no need
+for your front-end to build SSA form: LLVM provides highly tuned and
+well tested support for this, though the way it works is a bit
+unexpected for some.
+
+Why is this a hard problem?
+===========================
+
+To understand why mutable variables cause complexities in SSA
+construction, consider this extremely simple C example:
+
+.. code-block:: c
+
+    int G, H;
+    int test(_Bool Condition) {
+      int X;
+      if (Condition)
+        X = G;
+      else
+        X = H;
+      return X;
+    }
+
+In this case, we have the variable "X", whose value depends on the path
+executed in the program. Because there are two different possible values
+for X before the return instruction, a PHI node is inserted to merge the
+two values. The LLVM IR that we want for this example looks like this:
+
+.. code-block:: llvm
+
+    @G = weak global i32 0   ; type of @G is i32*
+    @H = weak global i32 0   ; type of @H is i32*
+
+    define i32 @test(i1 %Condition) {
+    entry:
+      br i1 %Condition, label %cond_true, label %cond_false
+
+    cond_true:
+      %X.0 = load i32* @G
+      br label %cond_next
+
+    cond_false:
+      %X.1 = load i32* @H
+      br label %cond_next
+
+    cond_next:
+      %X.2 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
+      ret i32 %X.2
+    }
+
+In this example, the loads from the G and H global variables are
+explicit in the LLVM IR, and they live in the then/else branches of the
+if statement (cond\_true/cond\_false). In order to merge the incoming
+values, the X.2 phi node in the cond\_next block selects the right value
+to use based on where control flow is coming from: if control flow comes
+from the cond\_false block, X.2 gets the value of X.1. Alternatively, if
+control flow comes from cond\_true, it gets the value of X.0. The intent
+of this chapter is not to explain the details of SSA form. For more
+information, see one of the many `online
+references <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_.
+
+The question for this article is "who places the phi nodes when lowering
+assignments to mutable variables?". The issue here is that LLVM
+*requires* that its IR be in SSA form: there is no "non-ssa" mode for
+it. However, SSA construction requires non-trivial algorithms and data
+structures, so it is inconvenient and wasteful for every front-end to
+have to reproduce this logic.
+
+Memory in LLVM
+==============
+
+The 'trick' here is that while LLVM does require all register values to
+be in SSA form, it does not require (or permit) memory objects to be in
+SSA form. In the example above, note that the loads from G and H are
+direct accesses to G and H: they are not renamed or versioned. This
+differs from some other compiler systems, which do try to version memory
+objects. In LLVM, instead of encoding dataflow analysis of memory into
+the LLVM IR, it is handled with `Analysis
+Passes <../WritingAnLLVMPass.html>`_ which are computed on demand.
+
+With this in mind, the high-level idea is that we want to make a stack
+variable (which lives in memory, because it is on the stack) for each
+mutable object in a function. To take advantage of this trick, we need
+to talk about how LLVM represents stack variables.
+
+In LLVM, all memory accesses are explicit with load/store instructions,
+and it is carefully designed not to have (or need) an "address-of"
+operator. Notice how the type of the @G/@H global variables is actually
+"i32\*" even though the variable is defined as "i32". What this means is
+that @G defines *space* for an i32 in the global data area, but its
+*name* actually refers to the address for that space. Stack variables
+work the same way, except that instead of being declared with global
+variable definitions, they are declared with the `LLVM alloca
+instruction <../LangRef.html#i_alloca>`_:
+
+.. code-block:: llvm
+
+    define i32 @example() {
+    entry:
+      %X = alloca i32           ; type of %X is i32*.
+      ...
+      %tmp = load i32* %X       ; load the stack value %X from the stack.
+      %tmp2 = add i32 %tmp, 1   ; increment it
+      store i32 %tmp2, i32* %X  ; store it back
+      ...
+
+This code shows an example of how you can declare and manipulate a stack
+variable in the LLVM IR. Stack memory allocated with the alloca
+instruction is fully general: you can pass the address of the stack slot
+to functions, you can store it in other variables, etc. In our example
+above, we could rewrite the example to use the alloca technique to avoid
+using a PHI node:
+
+.. code-block:: llvm
+
+    @G = weak global i32 0   ; type of @G is i32*
+    @H = weak global i32 0   ; type of @H is i32*
+
+    define i32 @test(i1 %Condition) {
+    entry:
+      %X = alloca i32           ; type of %X is i32*.
+      br i1 %Condition, label %cond_true, label %cond_false
+
+    cond_true:
+      %X.0 = load i32* @G
+            store i32 %X.0, i32* %X   ; Update X
+      br label %cond_next
+
+    cond_false:
+      %X.1 = load i32* @H
+            store i32 %X.1, i32* %X   ; Update X
+      br label %cond_next
+
+    cond_next:
+      %X.2 = load i32* %X       ; Read X
+      ret i32 %X.2
+    }
+
+With this, we have discovered a way to handle arbitrary mutable
+variables without the need to create Phi nodes at all:
+
+#. Each mutable variable becomes a stack allocation.
+#. Each read of the variable becomes a load from the stack.
+#. Each update of the variable becomes a store to the stack.
+#. Taking the address of a variable just uses the stack address
+   directly.
+
+While this solution has solved our immediate problem, it introduced
+another one: we have now apparently introduced a lot of stack traffic
+for very simple and common operations, a major performance problem.
+Fortunately for us, the LLVM optimizer has a highly-tuned optimization
+pass named "mem2reg" that handles this case, promoting allocas like this
+into SSA registers, inserting Phi nodes as appropriate. If you run this
+example through the pass, for example, you'll get:
+
+.. code-block:: bash
+
+    $ llvm-as < example.ll | opt -mem2reg | llvm-dis
+    @G = weak global i32 0
+    @H = weak global i32 0
+
+    define i32 @test(i1 %Condition) {
+    entry:
+      br i1 %Condition, label %cond_true, label %cond_false
+
+    cond_true:
+      %X.0 = load i32* @G
+      br label %cond_next
+
+    cond_false:
+      %X.1 = load i32* @H
+      br label %cond_next
+
+    cond_next:
+      %X.01 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
+      ret i32 %X.01
+    }
+
+The mem2reg pass implements the standard "iterated dominance frontier"
+algorithm for constructing SSA form and has a number of optimizations
+that speed up (very common) degenerate cases. The mem2reg optimization
+pass is the answer to dealing with mutable variables, and we highly
+recommend that you depend on it. Note that mem2reg only works on
+variables in certain circumstances:
+
+#. mem2reg is alloca-driven: it looks for allocas and if it can handle
+   them, it promotes them. It does not apply to global variables or heap
+   allocations.
+#. mem2reg only looks for alloca instructions in the entry block of the
+   function. Being in the entry block guarantees that the alloca is only
+   executed once, which makes analysis simpler.
+#. mem2reg only promotes allocas whose uses are direct loads and stores.
+   If the address of the stack object is passed to a function, or if any
+   funny pointer arithmetic is involved, the alloca will not be
+   promoted.
+#. mem2reg only works on allocas of `first
+   class <../LangRef.html#t_classifications>`_ values (such as pointers,
+   scalars and vectors), and only if the array size of the allocation is
+   1 (or missing in the .ll file). mem2reg is not capable of promoting
+   structs or arrays to registers. Note that the "scalarrepl" pass is
+   more powerful and can promote structs, "unions", and arrays in many
+   cases.
+
+All of these properties are easy to satisfy for most imperative
+languages, and we'll illustrate it below with Kaleidoscope. The final
+question you may be asking is: should I bother with this nonsense for my
+front-end? Wouldn't it be better if I just did SSA construction
+directly, avoiding use of the mem2reg optimization pass? In short, we
+strongly recommend that you use this technique for building SSA form,
+unless there is an extremely good reason not to. Using this technique
+is:
+
+-  Proven and well tested: llvm-gcc and clang both use this technique
+   for local mutable variables. As such, the most common clients of LLVM
+   are using this to handle a bulk of their variables. You can be sure
+   that bugs are found fast and fixed early.
+-  Extremely Fast: mem2reg has a number of special cases that make it
+   fast in common cases as well as fully general. For example, it has
+   fast-paths for variables that are only used in a single block,
+   variables that only have one assignment point, good heuristics to
+   avoid insertion of unneeded phi nodes, etc.
+-  Needed for debug info generation: `Debug information in
+   LLVM <../SourceLevelDebugging.html>`_ relies on having the address of
+   the variable exposed so that debug info can be attached to it. This
+   technique dovetails very naturally with this style of debug info.
+
+If nothing else, this makes it much easier to get your front-end up and
+running, and is very simple to implement. Lets extend Kaleidoscope with
+mutable variables now!
+
+Mutable Variables in Kaleidoscope
+=================================
+
+Now that we know the sort of problem we want to tackle, lets see what
+this looks like in the context of our little Kaleidoscope language.
+We're going to add two features:
+
+#. The ability to mutate variables with the '=' operator.
+#. The ability to define new variables.
+
+While the first item is really what this is about, we only have
+variables for incoming arguments as well as for induction variables, and
+redefining those only goes so far :). Also, the ability to define new
+variables is a useful thing regardless of whether you will be mutating
+them. Here's a motivating example that shows how we could use these:
+
+::
+
+    # Define ':' for sequencing: as a low-precedence operator that ignores operands
+    # and just returns the RHS.
+    def binary : 1 (x y) y;
+
+    # Recursive fib, we could do this before.
+    def fib(x)
+      if (x < 3) then
+        1
+      else
+        fib(x-1)+fib(x-2);
+
+    # Iterative fib.
+    def fibi(x)
+      var a = 1, b = 1, c in
+      (for i = 3, i < x in
+         c = a + b :
+         a = b :
+         b = c) :
+      b;
+
+    # Call it.
+    fibi(10);
+
+In order to mutate variables, we have to change our existing variables
+to use the "alloca trick". Once we have that, we'll add our new
+operator, then extend Kaleidoscope to support new variable definitions.
+
+Adjusting Existing Variables for Mutation
+=========================================
+
+The symbol table in Kaleidoscope is managed at code generation time by
+the '``named_values``' map. This map currently keeps track of the LLVM
+"Value\*" that holds the double value for the named variable. In order
+to support mutation, we need to change this slightly, so that it
+``named_values`` holds the *memory location* of the variable in
+question. Note that this change is a refactoring: it changes the
+structure of the code, but does not (by itself) change the behavior of
+the compiler. All of these changes are isolated in the Kaleidoscope code
+generator.
+
+At this point in Kaleidoscope's development, it only supports variables
+for two things: incoming arguments to functions and the induction
+variable of 'for' loops. For consistency, we'll allow mutation of these
+variables in addition to other user-defined variables. This means that
+these will both need memory locations.
+
+To start our transformation of Kaleidoscope, we'll change the
+``named_values`` map so that it maps to AllocaInst\* instead of Value\*.
+Once we do this, the C++ compiler will tell us what parts of the code we
+need to update:
+
+**Note:** the ocaml bindings currently model both ``Value*``'s and
+``AllocInst*``'s as ``Llvm.llvalue``'s, but this may change in the future
+to be more type safe.
+
+.. code-block:: ocaml
+
+    let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+
+Also, since we will need to create these alloca's, we'll use a helper
+function that ensures that the allocas are created in the entry block of
+the function:
+
+.. code-block:: ocaml
+
+    (* Create an alloca instruction in the entry block of the function. This
+     * is used for mutable variables etc. *)
+    let create_entry_block_alloca the_function var_name =
+      let builder = builder_at (instr_begin (entry_block the_function)) in
+      build_alloca double_type var_name builder
+
+This funny looking code creates an ``Llvm.llbuilder`` object that is
+pointing at the first instruction of the entry block. It then creates an
+alloca with the expected name and returns it. Because all values in
+Kaleidoscope are doubles, there is no need to pass in a type to use.
+
+With this in place, the first functionality change we want to make is to
+variable references. In our new scheme, variables live on the stack, so
+code generating a reference to them actually needs to produce a load
+from the stack slot:
+
+.. code-block:: ocaml
+
+    let rec codegen_expr = function
+      ...
+      | Ast.Variable name ->
+          let v = try Hashtbl.find named_values name with
+            | Not_found -> raise (Error "unknown variable name")
+          in
+          (* Load the value. *)
+          build_load v name builder
+
+As you can see, this is pretty straightforward. Now we need to update
+the things that define the variables to set up the alloca. We'll start
+with ``codegen_expr Ast.For ...`` (see the `full code listing <#code>`_
+for the unabridged code):
+
+.. code-block:: ocaml
+
+      | Ast.For (var_name, start, end_, step, body) ->
+          let the_function = block_parent (insertion_block builder) in
+
+          (* Create an alloca for the variable in the entry block. *)
+          let alloca = create_entry_block_alloca the_function var_name in
+
+          (* Emit the start code first, without 'variable' in scope. *)
+          let start_val = codegen_expr start in
+
+          (* Store the value into the alloca. *)
+          ignore(build_store start_val alloca builder);
+
+          ...
+
+          (* Within the loop, the variable is defined equal to the PHI node. If it
+           * shadows an existing variable, we have to restore it, so save it
+           * now. *)
+          let old_val =
+            try Some (Hashtbl.find named_values var_name) with Not_found -> None
+          in
+          Hashtbl.add named_values var_name alloca;
+
+          ...
+
+          (* Compute the end condition. *)
+          let end_cond = codegen_expr end_ in
+
+          (* Reload, increment, and restore the alloca. This handles the case where
+           * the body of the loop mutates the variable. *)
+          let cur_var = build_load alloca var_name builder in
+          let next_var = build_add cur_var step_val "nextvar" builder in
+          ignore(build_store next_var alloca builder);
+          ...
+
+This code is virtually identical to the code `before we allowed mutable
+variables <OCamlLangImpl5.html#forcodegen>`_. The big difference is that
+we no longer have to construct a PHI node, and we use load/store to
+access the variable as needed.
+
+To support mutable argument variables, we need to also make allocas for
+them. The code for this is also pretty simple:
+
+.. code-block:: ocaml
+
+    (* Create an alloca for each argument and register the argument in the symbol
+     * table so that references to it will succeed. *)
+    let create_argument_allocas the_function proto =
+      let args = match proto with
+        | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -> args
+      in
+      Array.iteri (fun i ai ->
+        let var_name = args.(i) in
+        (* Create an alloca for this variable. *)
+        let alloca = create_entry_block_alloca the_function var_name in
+
+        (* Store the initial value into the alloca. *)
+        ignore(build_store ai alloca builder);
+
+        (* Add arguments to variable symbol table. *)
+        Hashtbl.add named_values var_name alloca;
+      ) (params the_function)
+
+For each argument, we make an alloca, store the input value to the
+function into the alloca, and register the alloca as the memory location
+for the argument. This method gets invoked by ``Codegen.codegen_func``
+right after it sets up the entry block for the function.
+
+The final missing piece is adding the mem2reg pass, which allows us to
+get good codegen once again:
+
+.. code-block:: ocaml
+
+    let main () =
+      ...
+      let the_fpm = PassManager.create_function Codegen.the_module in
+
+      (* Set up the optimizer pipeline.  Start with registering info about how the
+       * target lays out data structures. *)
+      DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+      (* Promote allocas to registers. *)
+      add_memory_to_register_promotion the_fpm;
+
+      (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+      add_instruction_combining the_fpm;
+
+      (* reassociate expressions. *)
+      add_reassociation the_fpm;
+
+It is interesting to see what the code looks like before and after the
+mem2reg optimization runs. For example, this is the before/after code
+for our recursive fib function. Before the optimization:
+
+.. code-block:: llvm
+
+    define double @fib(double %x) {
+    entry:
+      %x1 = alloca double
+      store double %x, double* %x1
+      %x2 = load double* %x1
+      %cmptmp = fcmp ult double %x2, 3.000000e+00
+      %booltmp = uitofp i1 %cmptmp to double
+      %ifcond = fcmp one double %booltmp, 0.000000e+00
+      br i1 %ifcond, label %then, label %else
+
+    then:    ; preds = %entry
+      br label %ifcont
+
+    else:    ; preds = %entry
+      %x3 = load double* %x1
+      %subtmp = fsub double %x3, 1.000000e+00
+      %calltmp = call double @fib(double %subtmp)
+      %x4 = load double* %x1
+      %subtmp5 = fsub double %x4, 2.000000e+00
+      %calltmp6 = call double @fib(double %subtmp5)
+      %addtmp = fadd double %calltmp, %calltmp6
+      br label %ifcont
+
+    ifcont:    ; preds = %else, %then
+      %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
+      ret double %iftmp
+    }
+
+Here there is only one variable (x, the input argument) but you can
+still see the extremely simple-minded code generation strategy we are
+using. In the entry block, an alloca is created, and the initial input
+value is stored into it. Each reference to the variable does a reload
+from the stack. Also, note that we didn't modify the if/then/else
+expression, so it still inserts a PHI node. While we could make an
+alloca for it, it is actually easier to create a PHI node for it, so we
+still just make the PHI.
+
+Here is the code after the mem2reg pass runs:
+
+.. code-block:: llvm
+
+    define double @fib(double %x) {
+    entry:
+      %cmptmp = fcmp ult double %x, 3.000000e+00
+      %booltmp = uitofp i1 %cmptmp to double
+      %ifcond = fcmp one double %booltmp, 0.000000e+00
+      br i1 %ifcond, label %then, label %else
+
+    then:
+      br label %ifcont
+
+    else:
+      %subtmp = fsub double %x, 1.000000e+00
+      %calltmp = call double @fib(double %subtmp)
+      %subtmp5 = fsub double %x, 2.000000e+00
+      %calltmp6 = call double @fib(double %subtmp5)
+      %addtmp = fadd double %calltmp, %calltmp6
+      br label %ifcont
+
+    ifcont:    ; preds = %else, %then
+      %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
+      ret double %iftmp
+    }
+
+This is a trivial case for mem2reg, since there are no redefinitions of
+the variable. The point of showing this is to calm your tension about
+inserting such blatent inefficiencies :).
+
+After the rest of the optimizers run, we get:
+
+.. code-block:: llvm
+
+    define double @fib(double %x) {
+    entry:
+      %cmptmp = fcmp ult double %x, 3.000000e+00
+      %booltmp = uitofp i1 %cmptmp to double
+      %ifcond = fcmp ueq double %booltmp, 0.000000e+00
+      br i1 %ifcond, label %else, label %ifcont
+
+    else:
+      %subtmp = fsub double %x, 1.000000e+00
+      %calltmp = call double @fib(double %subtmp)
+      %subtmp5 = fsub double %x, 2.000000e+00
+      %calltmp6 = call double @fib(double %subtmp5)
+      %addtmp = fadd double %calltmp, %calltmp6
+      ret double %addtmp
+
+    ifcont:
+      ret double 1.000000e+00
+    }
+
+Here we see that the simplifycfg pass decided to clone the return
+instruction into the end of the 'else' block. This allowed it to
+eliminate some branches and the PHI node.
+
+Now that all symbol table references are updated to use stack variables,
+we'll add the assignment operator.
+
+New Assignment Operator
+=======================
+
+With our current framework, adding a new assignment operator is really
+simple. We will parse it just like any other binary operator, but handle
+it internally (instead of allowing the user to define it). The first
+step is to set a precedence:
+
+.. code-block:: ocaml
+
+    let main () =
+      (* Install standard binary operators.
+       * 1 is the lowest precedence. *)
+      Hashtbl.add Parser.binop_precedence '=' 2;
+      Hashtbl.add Parser.binop_precedence '<' 10;
+      Hashtbl.add Parser.binop_precedence '+' 20;
+      Hashtbl.add Parser.binop_precedence '-' 20;
+      ...
+
+Now that the parser knows the precedence of the binary operator, it
+takes care of all the parsing and AST generation. We just need to
+implement codegen for the assignment operator. This looks like:
+
+.. code-block:: ocaml
+
+    let rec codegen_expr = function
+          begin match op with
+          | '=' ->
+              (* Special case '=' because we don't want to emit the LHS as an
+               * expression. *)
+              let name =
+                match lhs with
+                | Ast.Variable name -> name
+                | _ -> raise (Error "destination of '=' must be a variable")
+              in
+
+Unlike the rest of the binary operators, our assignment operator doesn't
+follow the "emit LHS, emit RHS, do computation" model. As such, it is
+handled as a special case before the other binary operators are handled.
+The other strange thing is that it requires the LHS to be a variable. It
+is invalid to have "(x+1) = expr" - only things like "x = expr" are
+allowed.
+
+.. code-block:: ocaml
+
+              (* Codegen the rhs. *)
+              let val_ = codegen_expr rhs in
+
+              (* Lookup the name. *)
+              let variable = try Hashtbl.find named_values name with
+              | Not_found -> raise (Error "unknown variable name")
+              in
+              ignore(build_store val_ variable builder);
+              val_
+          | _ ->
+                ...
+
+Once we have the variable, codegen'ing the assignment is
+straightforward: we emit the RHS of the assignment, create a store, and
+return the computed value. Returning a value allows for chained
+assignments like "X = (Y = Z)".
+
+Now that we have an assignment operator, we can mutate loop variables
+and arguments. For example, we can now run code like this:
+
+::
+
+    # Function to print a double.
+    extern printd(x);
+
+    # Define ':' for sequencing: as a low-precedence operator that ignores operands
+    # and just returns the RHS.
+    def binary : 1 (x y) y;
+
+    def test(x)
+      printd(x) :
+      x = 4 :
+      printd(x);
+
+    test(123);
+
+When run, this example prints "123" and then "4", showing that we did
+actually mutate the value! Okay, we have now officially implemented our
+goal: getting this to work requires SSA construction in the general
+case. However, to be really useful, we want the ability to define our
+own local variables, lets add this next!
+
+User-defined Local Variables
+============================
+
+Adding var/in is just like any other other extensions we made to
+Kaleidoscope: we extend the lexer, the parser, the AST and the code
+generator. The first step for adding our new 'var/in' construct is to
+extend the lexer. As before, this is pretty trivial, the code looks like
+this:
+
+.. code-block:: ocaml
+
+    type token =
+      ...
+      (* var definition *)
+      | Var
+
+    ...
+
+    and lex_ident buffer = parser
+          ...
+          | "in" -> [< 'Token.In; stream >]
+          | "binary" -> [< 'Token.Binary; stream >]
+          | "unary" -> [< 'Token.Unary; stream >]
+          | "var" -> [< 'Token.Var; stream >]
+          ...
+
+The next step is to define the AST node that we will construct. For
+var/in, it looks like this:
+
+.. code-block:: ocaml
+
+    type expr =
+      ...
+      (* variant for var/in. *)
+      | Var of (string * expr option) array * expr
+      ...
+
+var/in allows a list of names to be defined all at once, and each name
+can optionally have an initializer value. As such, we capture this
+information in the VarNames vector. Also, var/in has a body, this body
+is allowed to access the variables defined by the var/in.
+
+With this in place, we can define the parser pieces. The first thing we
+do is add it as a primary expression:
+
+.. code-block:: ocaml
+
+    (* primary
+     *   ::= identifier
+     *   ::= numberexpr
+     *   ::= parenexpr
+     *   ::= ifexpr
+     *   ::= forexpr
+     *   ::= varexpr *)
+    let rec parse_primary = parser
+      ...
+      (* varexpr
+       *   ::= 'var' identifier ('=' expression?
+       *             (',' identifier ('=' expression)?)* 'in' expression *)
+      | [< 'Token.Var;
+           (* At least one variable name is required. *)
+           'Token.Ident id ?? "expected identifier after var";
+           init=parse_var_init;
+           var_names=parse_var_names [(id, init)];
+           (* At this point, we have to have 'in'. *)
+           'Token.In ?? "expected 'in' keyword after 'var'";
+           body=parse_expr >] ->
+          Ast.Var (Array.of_list (List.rev var_names), body)
+
+    ...
+
+    and parse_var_init = parser
+      (* read in the optional initializer. *)
+      | [< 'Token.Kwd '='; e=parse_expr >] -> Some e
+      | [< >] -> None
+
+    and parse_var_names accumulator = parser
+      | [< 'Token.Kwd ',';
+           'Token.Ident id ?? "expected identifier list after var";
+           init=parse_var_init;
+           e=parse_var_names ((id, init) :: accumulator) >] -> e
+      | [< >] -> accumulator
+
+Now that we can parse and represent the code, we need to support
+emission of LLVM IR for it. This code starts out with:
+
+.. code-block:: ocaml
+
+    let rec codegen_expr = function
+      ...
+      | Ast.Var (var_names, body)
+          let old_bindings = ref [] in
+
+          let the_function = block_parent (insertion_block builder) in
+
+          (* Register all variables and emit their initializer. *)
+          Array.iter (fun (var_name, init) ->
+
+Basically it loops over all the variables, installing them one at a
+time. For each variable we put into the symbol table, we remember the
+previous value that we replace in OldBindings.
+
+.. code-block:: ocaml
+
+            (* Emit the initializer before adding the variable to scope, this
+             * prevents the initializer from referencing the variable itself, and
+             * permits stuff like this:
+             *   var a = 1 in
+             *     var a = a in ...   # refers to outer 'a'. *)
+            let init_val =
+              match init with
+              | Some init -> codegen_expr init
+              (* If not specified, use 0.0. *)
+              | None -> const_float double_type 0.0
+            in
+
+            let alloca = create_entry_block_alloca the_function var_name in
+            ignore(build_store init_val alloca builder);
+
+            (* Remember the old variable binding so that we can restore the binding
+             * when we unrecurse. *)
+
+            begin
+              try
+                let old_value = Hashtbl.find named_values var_name in
+                old_bindings := (var_name, old_value) :: !old_bindings;
+              with Not_found > ()
+            end;
+
+            (* Remember this binding. *)
+            Hashtbl.add named_values var_name alloca;
+          ) var_names;
+
+There are more comments here than code. The basic idea is that we emit
+the initializer, create the alloca, then update the symbol table to
+point to it. Once all the variables are installed in the symbol table,
+we evaluate the body of the var/in expression:
+
+.. code-block:: ocaml
+
+          (* Codegen the body, now that all vars are in scope. *)
+          let body_val = codegen_expr body in
+
+Finally, before returning, we restore the previous variable bindings:
+
+.. code-block:: ocaml
+
+          (* Pop all our variables from scope. *)
+          List.iter (fun (var_name, old_value) ->
+            Hashtbl.add named_values var_name old_value
+          ) !old_bindings;
+
+          (* Return the body computation. *)
+          body_val
+
+The end result of all of this is that we get properly scoped variable
+definitions, and we even (trivially) allow mutation of them :).
+
+With this, we completed what we set out to do. Our nice iterative fib
+example from the intro compiles and runs just fine. The mem2reg pass
+optimizes all of our stack variables into SSA registers, inserting PHI
+nodes where needed, and our front-end remains simple: no "iterated
+dominance frontier" computation anywhere in sight.
+
+Full Code Listing
+=================
+
+Here is the complete code listing for our running example, enhanced with
+mutable variables and var/in support. To build this example, use:
+
+.. code-block:: bash
+
+    # Compile
+    ocamlbuild toy.byte
+    # Run
+    ./toy.byte
+
+Here is the code:
+
+\_tags:
+    ::
+
+        <{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
+        <*.{byte,native}>: g++, use_llvm, use_llvm_analysis
+        <*.{byte,native}>: use_llvm_executionengine, use_llvm_target
+        <*.{byte,native}>: use_llvm_scalar_opts, use_bindings
+
+myocamlbuild.ml:
+    .. code-block:: ocaml
+
+        open Ocamlbuild_plugin;;
+
+        ocaml_lib ~extern:true "llvm";;
+        ocaml_lib ~extern:true "llvm_analysis";;
+        ocaml_lib ~extern:true "llvm_executionengine";;
+        ocaml_lib ~extern:true "llvm_target";;
+        ocaml_lib ~extern:true "llvm_scalar_opts";;
+
+        flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);;
+        dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
+
+token.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer Tokens
+         *===----------------------------------------------------------------------===*)
+
+        (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+         * these others for known things. *)
+        type token =
+          (* commands *)
+          | Def | Extern
+
+          (* primary *)
+          | Ident of string | Number of float
+
+          (* unknown *)
+          | Kwd of char
+
+          (* control *)
+          | If | Then | Else
+          | For | In
+
+          (* operators *)
+          | Binary | Unary
+
+          (* var definition *)
+          | Var
+
+lexer.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Lexer
+         *===----------------------------------------------------------------------===*)
+
+        let rec lex = parser
+          (* Skip any whitespace. *)
+          | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+          (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+
+          (* number: [0-9.]+ *)
+          | [< ' ('0' .. '9' as c); stream >] ->
+              let buffer = Buffer.create 1 in
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+
+          (* Comment until end of line. *)
+          | [< ' ('#'); stream >] ->
+              lex_comment stream
+
+          (* Otherwise, just return the character as its ascii value. *)
+          | [< 'c; stream >] ->
+              [< 'Token.Kwd c; lex stream >]
+
+          (* end of stream. *)
+          | [< >] -> [< >]
+
+        and lex_number buffer = parser
+          | [< ' ('0' .. '9' | '.' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_number buffer stream
+          | [< stream=lex >] ->
+              [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+        and lex_ident buffer = parser
+          | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+              Buffer.add_char buffer c;
+              lex_ident buffer stream
+          | [< stream=lex >] ->
+              match Buffer.contents buffer with
+              | "def" -> [< 'Token.Def; stream >]
+              | "extern" -> [< 'Token.Extern; stream >]
+              | "if" -> [< 'Token.If; stream >]
+              | "then" -> [< 'Token.Then; stream >]
+              | "else" -> [< 'Token.Else; stream >]
+              | "for" -> [< 'Token.For; stream >]
+              | "in" -> [< 'Token.In; stream >]
+              | "binary" -> [< 'Token.Binary; stream >]
+              | "unary" -> [< 'Token.Unary; stream >]
+              | "var" -> [< 'Token.Var; stream >]
+              | id -> [< 'Token.Ident id; stream >]
+
+        and lex_comment = parser
+          | [< ' ('\n'); stream=lex >] -> stream
+          | [< 'c; e=lex_comment >] -> e
+          | [< >] -> [< >]
+
+ast.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Abstract Syntax Tree (aka Parse Tree)
+         *===----------------------------------------------------------------------===*)
+
+        (* expr - Base type for all expression nodes. *)
+        type expr =
+          (* variant for numeric literals like "1.0". *)
+          | Number of float
+
+          (* variant for referencing a variable, like "a". *)
+          | Variable of string
+
+          (* variant for a unary operator. *)
+          | Unary of char * expr
+
+          (* variant for a binary operator. *)
+          | Binary of char * expr * expr
+
+          (* variant for function calls. *)
+          | Call of string * expr array
+
+          (* variant for if/then/else. *)
+          | If of expr * expr * expr
+
+          (* variant for for/in. *)
+          | For of string * expr * expr * expr option * expr
+
+          (* variant for var/in. *)
+          | Var of (string * expr option) array * expr
+
+        (* proto - This type represents the "prototype" for a function, which captures
+         * its name, and its argument names (thus implicitly the number of arguments the
+         * function takes). *)
+        type proto =
+          | Prototype of string * string array
+          | BinOpPrototype of string * string array * int
+
+        (* func - This type represents a function definition itself. *)
+        type func = Function of proto * expr
+
+parser.ml:
+    .. code-block:: ocaml
+
+        (*===---------------------------------------------------------------------===
+         * Parser
+         *===---------------------------------------------------------------------===*)
+
+        (* binop_precedence - This holds the precedence for each binary operator that is
+         * defined *)
+        let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+        (* precedence - Get the precedence of the pending binary operator token. *)
+        let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+        (* primary
+         *   ::= identifier
+         *   ::= numberexpr
+         *   ::= parenexpr
+         *   ::= ifexpr
+         *   ::= forexpr
+         *   ::= varexpr *)
+        let rec parse_primary = parser
+          (* numberexpr ::= number *)
+          | [< 'Token.Number n >] -> Ast.Number n
+
+          (* parenexpr ::= '(' expression ')' *)
+          | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+          (* identifierexpr
+           *   ::= identifier
+           *   ::= identifier '(' argumentexpr ')' *)
+          | [< 'Token.Ident id; stream >] ->
+              let rec parse_args accumulator = parser
+                | [< e=parse_expr; stream >] ->
+                    begin parser
+                      | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+                      | [< >] -> e :: accumulator
+                    end stream
+                | [< >] -> accumulator
+              in
+              let rec parse_ident id = parser
+                (* Call. *)
+                | [< 'Token.Kwd '(';
+                     args=parse_args [];
+                     'Token.Kwd ')' ?? "expected ')'">] ->
+                    Ast.Call (id, Array.of_list (List.rev args))
+
+                (* Simple variable ref. *)
+                | [< >] -> Ast.Variable id
+              in
+              parse_ident id stream
+
+          (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+          | [< 'Token.If; c=parse_expr;
+               'Token.Then ?? "expected 'then'"; t=parse_expr;
+               'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
+              Ast.If (c, t, e)
+
+          (* forexpr
+                ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
+          | [< 'Token.For;
+               'Token.Ident id ?? "expected identifier after for";
+               'Token.Kwd '=' ?? "expected '=' after for";
+               stream >] ->
+              begin parser
+                | [<
+                     start=parse_expr;
+                     'Token.Kwd ',' ?? "expected ',' after for";
+                     end_=parse_expr;
+                     stream >] ->
+                    let step =
+                      begin parser
+                      | [< 'Token.Kwd ','; step=parse_expr >] -> Some step
+                      | [< >] -> None
+                      end stream
+                    in
+                    begin parser
+                    | [< 'Token.In; body=parse_expr >] ->
+                        Ast.For (id, start, end_, step, body)
+                    | [< >] ->
+                        raise (Stream.Error "expected 'in' after for")
+                    end stream
+                | [< >] ->
+                    raise (Stream.Error "expected '=' after for")
+              end stream
+
+          (* varexpr
+           *   ::= 'var' identifier ('=' expression?
+           *             (',' identifier ('=' expression)?)* 'in' expression *)
+          | [< 'Token.Var;
+               (* At least one variable name is required. *)
+               'Token.Ident id ?? "expected identifier after var";
+               init=parse_var_init;
+               var_names=parse_var_names [(id, init)];
+               (* At this point, we have to have 'in'. *)
+               'Token.In ?? "expected 'in' keyword after 'var'";
+               body=parse_expr >] ->
+              Ast.Var (Array.of_list (List.rev var_names), body)
+
+          | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+        (* unary
+         *   ::= primary
+         *   ::= '!' unary *)
+        and parse_unary = parser
+          (* If this is a unary operator, read it. *)
+          | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] ->
+              Ast.Unary (op, operand)
+
+          (* If the current token is not an operator, it must be a primary expr. *)
+          | [< stream >] -> parse_primary stream
+
+        (* binoprhs
+         *   ::= ('+' primary)* *)
+        and parse_bin_rhs expr_prec lhs stream =
+          match Stream.peek stream with
+          (* If this is a binop, find its precedence. *)
+          | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+              let token_prec = precedence c in
+
+              (* If this is a binop that binds at least as tightly as the current binop,
+               * consume it, otherwise we are done. *)
+              if token_prec < expr_prec then lhs else begin
+                (* Eat the binop. *)
+                Stream.junk stream;
+
+                (* Parse the primary expression after the binary operator. *)
+                let rhs = parse_unary stream in
+
+                (* Okay, we know this is a binop. *)
+                let rhs =
+                  match Stream.peek stream with
+                  | Some (Token.Kwd c2) ->
+                      (* If BinOp binds less tightly with rhs than the operator after
+                       * rhs, let the pending operator take rhs as its lhs. *)
+                      let next_prec = precedence c2 in
+                      if token_prec < next_prec
+                      then parse_bin_rhs (token_prec + 1) rhs stream
+                      else rhs
+                  | _ -> rhs
+                in
+
+                (* Merge lhs/rhs. *)
+                let lhs = Ast.Binary (c, lhs, rhs) in
+                parse_bin_rhs expr_prec lhs stream
+              end
+          | _ -> lhs
+
+        and parse_var_init = parser
+          (* read in the optional initializer. *)
+          | [< 'Token.Kwd '='; e=parse_expr >] -> Some e
+          | [< >] -> None
+
+        and parse_var_names accumulator = parser
+          | [< 'Token.Kwd ',';
+               'Token.Ident id ?? "expected identifier list after var";
+               init=parse_var_init;
+               e=parse_var_names ((id, init) :: accumulator) >] -> e
+          | [< >] -> accumulator
+
+        (* expression
+         *   ::= primary binoprhs *)
+        and parse_expr = parser
+          | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream
+
+        (* prototype
+         *   ::= id '(' id* ')'
+         *   ::= binary LETTER number? (id, id)
+         *   ::= unary LETTER number? (id) *)
+        let parse_prototype =
+          let rec parse_args accumulator = parser
+            | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+            | [< >] -> accumulator
+          in
+          let parse_operator = parser
+            | [< 'Token.Unary >] -> "unary", 1
+            | [< 'Token.Binary >] -> "binary", 2
+          in
+          let parse_binary_precedence = parser
+            | [< 'Token.Number n >] -> int_of_float n
+            | [< >] -> 30
+          in
+          parser
+          | [< 'Token.Ident id;
+               'Token.Kwd '(' ?? "expected '(' in prototype";
+               args=parse_args [];
+               'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+              (* success. *)
+              Ast.Prototype (id, Array.of_list (List.rev args))
+          | [< (prefix, kind)=parse_operator;
+               'Token.Kwd op ?? "expected an operator";
+               (* Read the precedence if present. *)
+               binary_precedence=parse_binary_precedence;
+               'Token.Kwd '(' ?? "expected '(' in prototype";
+                args=parse_args [];
+               'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+              let name = prefix ^ (String.make 1 op) in
+              let args = Array.of_list (List.rev args) in
+
+              (* Verify right number of arguments for operator. *)
+              if Array.length args != kind
+              then raise (Stream.Error "invalid number of operands for operator")
+              else
+                if kind == 1 then
+                  Ast.Prototype (name, args)
+                else
+                  Ast.BinOpPrototype (name, args, binary_precedence)
+          | [< >] ->
+              raise (Stream.Error "expected function name in prototype")
+
+        (* definition ::= 'def' prototype expression *)
+        let parse_definition = parser
+          | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+              Ast.Function (p, e)
+
+        (* toplevelexpr ::= expression *)
+        let parse_toplevel = parser
+          | [< e=parse_expr >] ->
+              (* Make an anonymous proto. *)
+              Ast.Function (Ast.Prototype ("", [||]), e)
+
+        (*  external ::= 'extern' prototype *)
+        let parse_extern = parser
+          | [< 'Token.Extern; e=parse_prototype >] -> e
+
+codegen.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Code Generation
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+
+        exception Error of string
+
+        let context = global_context ()
+        let the_module = create_module context "my cool jit"
+        let builder = builder context
+        let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+        let double_type = double_type context
+
+        (* Create an alloca instruction in the entry block of the function. This
+         * is used for mutable variables etc. *)
+        let create_entry_block_alloca the_function var_name =
+          let builder = builder_at context (instr_begin (entry_block the_function)) in
+          build_alloca double_type var_name builder
+
+        let rec codegen_expr = function
+          | Ast.Number n -> const_float double_type n
+          | Ast.Variable name ->
+              let v = try Hashtbl.find named_values name with
+                | Not_found -> raise (Error "unknown variable name")
+              in
+              (* Load the value. *)
+              build_load v name builder
+          | Ast.Unary (op, operand) ->
+              let operand = codegen_expr operand in
+              let callee = "unary" ^ (String.make 1 op) in
+              let callee =
+                match lookup_function callee the_module with
+                | Some callee -> callee
+                | None -> raise (Error "unknown unary operator")
+              in
+              build_call callee [|operand|] "unop" builder
+          | Ast.Binary (op, lhs, rhs) ->
+              begin match op with
+              | '=' ->
+                  (* Special case '=' because we don't want to emit the LHS as an
+                   * expression. *)
+                  let name =
+                    match lhs with
+                    | Ast.Variable name -> name
+                    | _ -> raise (Error "destination of '=' must be a variable")
+                  in
+
+                  (* Codegen the rhs. *)
+                  let val_ = codegen_expr rhs in
+
+                  (* Lookup the name. *)
+                  let variable = try Hashtbl.find named_values name with
+                  | Not_found -> raise (Error "unknown variable name")
+                  in
+                  ignore(build_store val_ variable builder);
+                  val_
+              | _ ->
+                  let lhs_val = codegen_expr lhs in
+                  let rhs_val = codegen_expr rhs in
+                  begin
+                    match op with
+                    | '+' -> build_add lhs_val rhs_val "addtmp" builder
+                    | '-' -> build_sub lhs_val rhs_val "subtmp" builder
+                    | '*' -> build_mul lhs_val rhs_val "multmp" builder
+                    | '<' ->
+                        (* Convert bool 0/1 to double 0.0 or 1.0 *)
+                        let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+                        build_uitofp i double_type "booltmp" builder
+                    | _ ->
+                        (* If it wasn't a builtin binary operator, it must be a user defined
+                         * one. Emit a call to it. *)
+                        let callee = "binary" ^ (String.make 1 op) in
+                        let callee =
+                          match lookup_function callee the_module with
+                          | Some callee -> callee
+                          | None -> raise (Error "binary operator not found!")
+                        in
+                        build_call callee [|lhs_val; rhs_val|] "binop" builder
+                  end
+              end
+          | Ast.Call (callee, args) ->
+              (* Look up the name in the module table. *)
+              let callee =
+                match lookup_function callee the_module with
+                | Some callee -> callee
+                | None -> raise (Error "unknown function referenced")
+              in
+              let params = params callee in
+
+              (* If argument mismatch error. *)
+              if Array.length params == Array.length args then () else
+                raise (Error "incorrect # arguments passed");
+              let args = Array.map codegen_expr args in
+              build_call callee args "calltmp" builder
+          | Ast.If (cond, then_, else_) ->
+              let cond = codegen_expr cond in
+
+              (* Convert condition to a bool by comparing equal to 0.0 *)
+              let zero = const_float double_type 0.0 in
+              let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
+
+              (* Grab the first block so that we might later add the conditional branch
+               * to it at the end of the function. *)
+              let start_bb = insertion_block builder in
+              let the_function = block_parent start_bb in
+
+              let then_bb = append_block context "then" the_function in
+
+              (* Emit 'then' value. *)
+              position_at_end then_bb builder;
+              let then_val = codegen_expr then_ in
+
+              (* Codegen of 'then' can change the current block, update then_bb for the
+               * phi. We create a new name because one is used for the phi node, and the
+               * other is used for the conditional branch. *)
+              let new_then_bb = insertion_block builder in
+
+              (* Emit 'else' value. *)
+              let else_bb = append_block context "else" the_function in
+              position_at_end else_bb builder;
+              let else_val = codegen_expr else_ in
+
+              (* Codegen of 'else' can change the current block, update else_bb for the
+               * phi. *)
+              let new_else_bb = insertion_block builder in
+
+              (* Emit merge block. *)
+              let merge_bb = append_block context "ifcont" the_function in
+              position_at_end merge_bb builder;
+              let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
+              let phi = build_phi incoming "iftmp" builder in
+
+              (* Return to the start block to add the conditional branch. *)
+              position_at_end start_bb builder;
+              ignore (build_cond_br cond_val then_bb else_bb builder);
+
+              (* Set a unconditional branch at the end of the 'then' block and the
+               * 'else' block to the 'merge' block. *)
+              position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
+              position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
+
+              (* Finally, set the builder to the end of the merge block. *)
+              position_at_end merge_bb builder;
+
+              phi
+          | Ast.For (var_name, start, end_, step, body) ->
+              (* Output this as:
+               *   var = alloca double
+               *   ...
+               *   start = startexpr
+               *   store start -> var
+               *   goto loop
+               * loop:
+               *   ...
+               *   bodyexpr
+               *   ...
+               * loopend:
+               *   step = stepexpr
+               *   endcond = endexpr
+               *
+               *   curvar = load var
+               *   nextvar = curvar + step
+               *   store nextvar -> var
+               *   br endcond, loop, endloop
+               * outloop: *)
+
+              let the_function = block_parent (insertion_block builder) in
+
+              (* Create an alloca for the variable in the entry block. *)
+              let alloca = create_entry_block_alloca the_function var_name in
+
+              (* Emit the start code first, without 'variable' in scope. *)
+              let start_val = codegen_expr start in
+
+              (* Store the value into the alloca. *)
+              ignore(build_store start_val alloca builder);
+
+              (* Make the new basic block for the loop header, inserting after current
+               * block. *)
+              let loop_bb = append_block context "loop" the_function in
+
+              (* Insert an explicit fall through from the current block to the
+               * loop_bb. *)
+              ignore (build_br loop_bb builder);
+
+              (* Start insertion in loop_bb. *)
+              position_at_end loop_bb builder;
+
+              (* Within the loop, the variable is defined equal to the PHI node. If it
+               * shadows an existing variable, we have to restore it, so save it
+               * now. *)
+              let old_val =
+                try Some (Hashtbl.find named_values var_name) with Not_found -> None
+              in
+              Hashtbl.add named_values var_name alloca;
+
+              (* Emit the body of the loop.  This, like any other expr, can change the
+               * current BB.  Note that we ignore the value computed by the body, but
+               * don't allow an error *)
+              ignore (codegen_expr body);
+
+              (* Emit the step value. *)
+              let step_val =
+                match step with
+                | Some step -> codegen_expr step
+                (* If not specified, use 1.0. *)
+                | None -> const_float double_type 1.0
+              in
+
+              (* Compute the end condition. *)
+              let end_cond = codegen_expr end_ in
+
+              (* Reload, increment, and restore the alloca. This handles the case where
+               * the body of the loop mutates the variable. *)
+              let cur_var = build_load alloca var_name builder in
+              let next_var = build_add cur_var step_val "nextvar" builder in
+              ignore(build_store next_var alloca builder);
+
+              (* Convert condition to a bool by comparing equal to 0.0. *)
+              let zero = const_float double_type 0.0 in
+              let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
+
+              (* Create the "after loop" block and insert it. *)
+              let after_bb = append_block context "afterloop" the_function in
+
+              (* Insert the conditional branch into the end of loop_end_bb. *)
+              ignore (build_cond_br end_cond loop_bb after_bb builder);
+
+              (* Any new code will be inserted in after_bb. *)
+              position_at_end after_bb builder;
+
+              (* Restore the unshadowed variable. *)
+              begin match old_val with
+              | Some old_val -> Hashtbl.add named_values var_name old_val
+              | None -> ()
+              end;
+
+              (* for expr always returns 0.0. *)
+              const_null double_type
+          | Ast.Var (var_names, body) ->
+              let old_bindings = ref [] in
+
+              let the_function = block_parent (insertion_block builder) in
+
+              (* Register all variables and emit their initializer. *)
+              Array.iter (fun (var_name, init) ->
+                (* Emit the initializer before adding the variable to scope, this
+                 * prevents the initializer from referencing the variable itself, and
+                 * permits stuff like this:
+                 *   var a = 1 in
+                 *     var a = a in ...   # refers to outer 'a'. *)
+                let init_val =
+                  match init with
+                  | Some init -> codegen_expr init
+                  (* If not specified, use 0.0. *)
+                  | None -> const_float double_type 0.0
+                in
+
+                let alloca = create_entry_block_alloca the_function var_name in
+                ignore(build_store init_val alloca builder);
+
+                (* Remember the old variable binding so that we can restore the binding
+                 * when we unrecurse. *)
+                begin
+                  try
+                    let old_value = Hashtbl.find named_values var_name in
+                    old_bindings := (var_name, old_value) :: !old_bindings;
+                  with Not_found -> ()
+                end;
+
+                (* Remember this binding. *)
+                Hashtbl.add named_values var_name alloca;
+              ) var_names;
+
+              (* Codegen the body, now that all vars are in scope. *)
+              let body_val = codegen_expr body in
+
+              (* Pop all our variables from scope. *)
+              List.iter (fun (var_name, old_value) ->
+                Hashtbl.add named_values var_name old_value
+              ) !old_bindings;
+
+              (* Return the body computation. *)
+              body_val
+
+        let codegen_proto = function
+          | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) ->
+              (* Make the function type: double(double,double) etc. *)
+              let doubles = Array.make (Array.length args) double_type in
+              let ft = function_type double_type doubles in
+              let f =
+                match lookup_function name the_module with
+                | None -> declare_function name ft the_module
+
+                (* If 'f' conflicted, there was already something named 'name'. If it
+                 * has a body, don't allow redefinition or reextern. *)
+                | Some f ->
+                    (* If 'f' already has a body, reject this. *)
+                    if block_begin f <> At_end f then
+                      raise (Error "redefinition of function");
+
+                    (* If 'f' took a different number of arguments, reject. *)
+                    if element_type (type_of f) <> ft then
+                      raise (Error "redefinition of function with different # args");
+                    f
+              in
+
+              (* Set names for all arguments. *)
+              Array.iteri (fun i a ->
+                let n = args.(i) in
+                set_value_name n a;
+                Hashtbl.add named_values n a;
+              ) (params f);
+              f
+
+        (* Create an alloca for each argument and register the argument in the symbol
+         * table so that references to it will succeed. *)
+        let create_argument_allocas the_function proto =
+          let args = match proto with
+            | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -> args
+          in
+          Array.iteri (fun i ai ->
+            let var_name = args.(i) in
+            (* Create an alloca for this variable. *)
+            let alloca = create_entry_block_alloca the_function var_name in
+
+            (* Store the initial value into the alloca. *)
+            ignore(build_store ai alloca builder);
+
+            (* Add arguments to variable symbol table. *)
+            Hashtbl.add named_values var_name alloca;
+          ) (params the_function)
+
+        let codegen_func the_fpm = function
+          | Ast.Function (proto, body) ->
+              Hashtbl.clear named_values;
+              let the_function = codegen_proto proto in
+
+              (* If this is an operator, install it. *)
+              begin match proto with
+              | Ast.BinOpPrototype (name, args, prec) ->
+                  let op = name.[String.length name - 1] in
+                  Hashtbl.add Parser.binop_precedence op prec;
+              | _ -> ()
+              end;
+
+              (* Create a new basic block to start insertion into. *)
+              let bb = append_block context "entry" the_function in
+              position_at_end bb builder;
+
+              try
+                (* Add all arguments to the symbol table and create their allocas. *)
+                create_argument_allocas the_function proto;
+
+                let ret_val = codegen_expr body in
+
+                (* Finish off the function. *)
+                let _ = build_ret ret_val builder in
+
+                (* Validate the generated code, checking for consistency. *)
+                Llvm_analysis.assert_valid_function the_function;
+
+                (* Optimize the function. *)
+                let _ = PassManager.run_function the_function the_fpm in
+
+                the_function
+              with e ->
+                delete_function the_function;
+                raise e
+
+toplevel.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Top-Level parsing and JIT Driver
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+        open Llvm_executionengine
+
+        (* top ::= definition | external | expression | ';' *)
+        let rec main_loop the_fpm the_execution_engine stream =
+          match Stream.peek stream with
+          | None -> ()
+
+          (* ignore top-level semicolons. *)
+          | Some (Token.Kwd ';') ->
+              Stream.junk stream;
+              main_loop the_fpm the_execution_engine stream
+
+          | Some token ->
+              begin
+                try match token with
+                | Token.Def ->
+                    let e = Parser.parse_definition stream in
+                    print_endline "parsed a function definition.";
+                    dump_value (Codegen.codegen_func the_fpm e);
+                | Token.Extern ->
+                    let e = Parser.parse_extern stream in
+                    print_endline "parsed an extern.";
+                    dump_value (Codegen.codegen_proto e);
+                | _ ->
+                    (* Evaluate a top-level expression into an anonymous function. *)
+                    let e = Parser.parse_toplevel stream in
+                    print_endline "parsed a top-level expr";
+                    let the_function = Codegen.codegen_func the_fpm e in
+                    dump_value the_function;
+
+                    (* JIT the function, returning a function pointer. *)
+                    let result = ExecutionEngine.run_function the_function [||]
+                      the_execution_engine in
+
+                    print_string "Evaluated to ";
+                    print_float (GenericValue.as_float Codegen.double_type result);
+                    print_newline ();
+                with Stream.Error s | Codegen.Error s ->
+                  (* Skip token for error recovery. *)
+                  Stream.junk stream;
+                  print_endline s;
+              end;
+              print_string "ready> "; flush stdout;
+              main_loop the_fpm the_execution_engine stream
+
+toy.ml:
+    .. code-block:: ocaml
+
+        (*===----------------------------------------------------------------------===
+         * Main driver code.
+         *===----------------------------------------------------------------------===*)
+
+        open Llvm
+        open Llvm_executionengine
+        open Llvm_target
+        open Llvm_scalar_opts
+
+        let main () =
+          ignore (initialize_native_target ());
+
+          (* Install standard binary operators.
+           * 1 is the lowest precedence. *)
+          Hashtbl.add Parser.binop_precedence '=' 2;
+          Hashtbl.add Parser.binop_precedence '<' 10;
+          Hashtbl.add Parser.binop_precedence '+' 20;
+          Hashtbl.add Parser.binop_precedence '-' 20;
+          Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+          (* Prime the first token. *)
+          print_string "ready> "; flush stdout;
+          let stream = Lexer.lex (Stream.of_channel stdin) in
+
+          (* Create the JIT. *)
+          let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+          let the_fpm = PassManager.create_function Codegen.the_module in
+
+          (* Set up the optimizer pipeline.  Start with registering info about how the
+           * target lays out data structures. *)
+          DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+          (* Promote allocas to registers. *)
+          add_memory_to_register_promotion the_fpm;
+
+          (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+          add_instruction_combination the_fpm;
+
+          (* reassociate expressions. *)
+          add_reassociation the_fpm;
+
+          (* Eliminate Common SubExpressions. *)
+          add_gvn the_fpm;
+
+          (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+          add_cfg_simplification the_fpm;
+
+          ignore (PassManager.initialize the_fpm);
+
+          (* Run the main "interpreter loop" now. *)
+          Toplevel.main_loop the_fpm the_execution_engine stream;
+
+          (* Print out all the generated code. *)
+          dump_module Codegen.the_module
+        ;;
+
+        main ()
+
+bindings.c
+    .. code-block:: c
+
+        #include <stdio.h>
+
+        /* putchard - putchar that takes a double and returns 0. */
+        extern double putchard(double X) {
+          putchar((char)X);
+          return 0;
+        }
+
+        /* printd - printf that takes a double prints it as "%f\n", returning 0. */
+        extern double printd(double X) {
+          printf("%f\n", X);
+          return 0;
+        }
+
+`Next: Conclusion and other useful LLVM tidbits <OCamlLangImpl8.html>`_
+
diff --git a/docs/tutorial/OCamlLangImpl8.html b/docs/tutorial/OCamlLangImpl8.html
deleted file mode 100644
index 7c1a500a21..0000000000
--- a/docs/tutorial/OCamlLangImpl8.html
+++ /dev/null
@@ -1,359 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-
-<html>
-<head>
-  <title>Kaleidoscope: Conclusion and other useful LLVM tidbits</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Chris Lattner">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>Kaleidoscope: Conclusion and other useful LLVM tidbits</h1>
-
-<ul>
-<li><a href="index.html">Up to Tutorial Index</a></li>
-<li>Chapter 8
-  <ol>
-    <li><a href="#conclusion">Tutorial Conclusion</a></li>
-    <li><a href="#llvmirproperties">Properties of LLVM IR</a>
-    <ul>
-      <li><a href="#targetindep">Target Independence</a></li>
-      <li><a href="#safety">Safety Guarantees</a></li>
-      <li><a href="#langspecific">Language-Specific Optimizations</a></li>
-    </ul>
-    </li>
-    <li><a href="#tipsandtricks">Tips and Tricks</a>
-    <ul>
-      <li><a href="#offsetofsizeof">Implementing portable 
-                                    offsetof/sizeof</a></li>
-      <li><a href="#gcstack">Garbage Collected Stack Frames</a></li>
-    </ul>
-    </li>
-  </ol>
-</li>
-</ul>
-
-
-<div class="doc_author">
-  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="conclusion">Tutorial Conclusion</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>Welcome to the final chapter of the "<a href="index.html">Implementing a
-language with LLVM</a>" tutorial.  In the course of this tutorial, we have grown
-our little Kaleidoscope language from being a useless toy, to being a
-semi-interesting (but probably still useless) toy. :)</p>
-
-<p>It is interesting to see how far we've come, and how little code it has
-taken.  We built the entire lexer, parser, AST, code generator, and an 
-interactive run-loop (with a JIT!) by-hand in under 700 lines of
-(non-comment/non-blank) code.</p>
-
-<p>Our little language supports a couple of interesting features: it supports
-user defined binary and unary operators, it uses JIT compilation for immediate
-evaluation, and it supports a few control flow constructs with SSA construction.
-</p>
-
-<p>Part of the idea of this tutorial was to show you how easy and fun it can be
-to define, build, and play with languages.  Building a compiler need not be a
-scary or mystical process!  Now that you've seen some of the basics, I strongly
-encourage you to take the code and hack on it.  For example, try adding:</p>
-
-<ul>
-<li><b>global variables</b> - While global variables have questional value in
-modern software engineering, they are often useful when putting together quick
-little hacks like the Kaleidoscope compiler itself.  Fortunately, our current
-setup makes it very easy to add global variables: just have value lookup check
-to see if an unresolved variable is in the global variable symbol table before
-rejecting it.  To create a new global variable, make an instance of the LLVM
-<tt>GlobalVariable</tt> class.</li>
-
-<li><b>typed variables</b> - Kaleidoscope currently only supports variables of
-type double.  This gives the language a very nice elegance, because only
-supporting one type means that you never have to specify types.  Different
-languages have different ways of handling this.  The easiest way is to require
-the user to specify types for every variable definition, and record the type
-of the variable in the symbol table along with its Value*.</li>
-
-<li><b>arrays, structs, vectors, etc</b> - Once you add types, you can start
-extending the type system in all sorts of interesting ways.  Simple arrays are
-very easy and are quite useful for many different applications.  Adding them is
-mostly an exercise in learning how the LLVM <a 
-href="../LangRef.html#i_getelementptr">getelementptr</a> instruction works: it
-is so nifty/unconventional, it <a 
-href="../GetElementPtr.html">has its own FAQ</a>!  If you add support
-for recursive types (e.g. linked lists), make sure to read the <a 
-href="../ProgrammersManual.html#TypeResolve">section in the LLVM
-Programmer's Manual</a> that describes how to construct them.</li>
-
-<li><b>standard runtime</b> - Our current language allows the user to access
-arbitrary external functions, and we use it for things like "printd" and
-"putchard".  As you extend the language to add higher-level constructs, often
-these constructs make the most sense if they are lowered to calls into a
-language-supplied runtime.  For example, if you add hash tables to the language,
-it would probably make sense to add the routines to a runtime, instead of 
-inlining them all the way.</li>
-
-<li><b>memory management</b> - Currently we can only access the stack in
-Kaleidoscope.  It would also be useful to be able to allocate heap memory,
-either with calls to the standard libc malloc/free interface or with a garbage
-collector.  If you would like to use garbage collection, note that LLVM fully
-supports <a href="../GarbageCollection.html">Accurate Garbage Collection</a>
-including algorithms that move objects and need to scan/update the stack.</li>
-
-<li><b>debugger support</b> - LLVM supports generation of <a 
-href="../SourceLevelDebugging.html">DWARF Debug info</a> which is understood by
-common debuggers like GDB.  Adding support for debug info is fairly 
-straightforward.  The best way to understand it is to compile some C/C++ code
-with "<tt>llvm-gcc -g -O0</tt>" and taking a look at what it produces.</li>
-
-<li><b>exception handling support</b> - LLVM supports generation of <a 
-href="../ExceptionHandling.html">zero cost exceptions</a> which interoperate
-with code compiled in other languages.  You could also generate code by
-implicitly making every function return an error value and checking it.  You 
-could also make explicit use of setjmp/longjmp.  There are many different ways
-to go here.</li>
-
-<li><b>object orientation, generics, database access, complex numbers,
-geometric programming, ...</b> - Really, there is
-no end of crazy features that you can add to the language.</li>
-
-<li><b>unusual domains</b> - We've been talking about applying LLVM to a domain
-that many people are interested in: building a compiler for a specific language.
-However, there are many other domains that can use compiler technology that are
-not typically considered.  For example, LLVM has been used to implement OpenGL
-graphics acceleration, translate C++ code to ActionScript, and many other
-cute and clever things.  Maybe you will be the first to JIT compile a regular
-expression interpreter into native code with LLVM?</li>
-
-</ul>
-
-<p>
-Have fun - try doing something crazy and unusual.  Building a language like
-everyone else always has, is much less fun than trying something a little crazy
-or off the wall and seeing how it turns out.  If you get stuck or want to talk
-about it, feel free to email the <a 
-href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev mailing 
-list</a>: it has lots of people who are interested in languages and are often
-willing to help out.
-</p>
-
-<p>Before we end this tutorial, I want to talk about some "tips and tricks" for generating
-LLVM IR.  These are some of the more subtle things that may not be obvious, but
-are very useful if you want to take advantage of LLVM's capabilities.</p>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="llvmirproperties">Properties of the LLVM IR</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>We have a couple common questions about code in the LLVM IR form - lets just
-get these out of the way right now, shall we?</p>
-
-<!-- ======================================================================= -->
-<h4><a name="targetindep">Target Independence</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Kaleidoscope is an example of a "portable language": any program written in
-Kaleidoscope will work the same way on any target that it runs on.  Many other
-languages have this property, e.g. lisp, java, haskell, javascript, python, etc
-(note that while these languages are portable, not all their libraries are).</p>
-
-<p>One nice aspect of LLVM is that it is often capable of preserving target
-independence in the IR: you can take the LLVM IR for a Kaleidoscope-compiled 
-program and run it on any target that LLVM supports, even emitting C code and
-compiling that on targets that LLVM doesn't support natively.  You can trivially
-tell that the Kaleidoscope compiler generates target-independent code because it
-never queries for any target-specific information when generating code.</p>
-
-<p>The fact that LLVM provides a compact, target-independent, representation for
-code gets a lot of people excited.  Unfortunately, these people are usually
-thinking about C or a language from the C family when they are asking questions
-about language portability.  I say "unfortunately", because there is really no
-way to make (fully general) C code portable, other than shipping the source code
-around (and of course, C source code is not actually portable in general
-either - ever port a really old application from 32- to 64-bits?).</p>
-
-<p>The problem with C (again, in its full generality) is that it is heavily
-laden with target specific assumptions.  As one simple example, the preprocessor
-often destructively removes target-independence from the code when it processes
-the input text:</p>
-
-<div class="doc_code">
-<pre>
-#ifdef __i386__
-  int X = 1;
-#else
-  int X = 42;
-#endif
-</pre>
-</div>
-
-<p>While it is possible to engineer more and more complex solutions to problems
-like this, it cannot be solved in full generality in a way that is better than shipping
-the actual source code.</p>
-
-<p>That said, there are interesting subsets of C that can be made portable.  If
-you are willing to fix primitive types to a fixed size (say int = 32-bits, 
-and long = 64-bits), don't care about ABI compatibility with existing binaries,
-and are willing to give up some other minor features, you can have portable
-code.  This can make sense for specialized domains such as an
-in-kernel language.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="safety">Safety Guarantees</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Many of the languages above are also "safe" languages: it is impossible for
-a program written in Java to corrupt its address space and crash the process
-(assuming the JVM has no bugs).
-Safety is an interesting property that requires a combination of language
-design, runtime support, and often operating system support.</p>
-
-<p>It is certainly possible to implement a safe language in LLVM, but LLVM IR
-does not itself guarantee safety.  The LLVM IR allows unsafe pointer casts,
-use after free bugs, buffer over-runs, and a variety of other problems.  Safety
-needs to be implemented as a layer on top of LLVM and, conveniently, several
-groups have investigated this.  Ask on the <a 
-href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev mailing 
-list</a> if you are interested in more details.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="langspecific">Language-Specific Optimizations</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>One thing about LLVM that turns off many people is that it does not solve all
-the world's problems in one system (sorry 'world hunger', someone else will have
-to solve you some other day).  One specific complaint is that people perceive
-LLVM as being incapable of performing high-level language-specific optimization:
-LLVM "loses too much information".</p>
-
-<p>Unfortunately, this is really not the place to give you a full and unified
-version of "Chris Lattner's theory of compiler design".  Instead, I'll make a
-few observations:</p>
-
-<p>First, you're right that LLVM does lose information.  For example, as of this
-writing, there is no way to distinguish in the LLVM IR whether an SSA-value came
-from a C "int" or a C "long" on an ILP32 machine (other than debug info).  Both
-get compiled down to an 'i32' value and the information about what it came from
-is lost.  The more general issue here, is that the LLVM type system uses
-"structural equivalence" instead of "name equivalence".  Another place this
-surprises people is if you have two types in a high-level language that have the
-same structure (e.g. two different structs that have a single int field): these
-types will compile down into a single LLVM type and it will be impossible to
-tell what it came from.</p>
-
-<p>Second, while LLVM does lose information, LLVM is not a fixed target: we 
-continue to enhance and improve it in many different ways.  In addition to
-adding new features (LLVM did not always support exceptions or debug info), we
-also extend the IR to capture important information for optimization (e.g.
-whether an argument is sign or zero extended, information about pointers
-aliasing, etc).  Many of the enhancements are user-driven: people want LLVM to
-include some specific feature, so they go ahead and extend it.</p>
-
-<p>Third, it is <em>possible and easy</em> to add language-specific
-optimizations, and you have a number of choices in how to do it.  As one trivial
-example, it is easy to add language-specific optimization passes that
-"know" things about code compiled for a language.  In the case of the C family,
-there is an optimization pass that "knows" about the standard C library
-functions.  If you call "exit(0)" in main(), it knows that it is safe to
-optimize that into "return 0;" because C specifies what the 'exit'
-function does.</p>
-
-<p>In addition to simple library knowledge, it is possible to embed a variety of
-other language-specific information into the LLVM IR.  If you have a specific
-need and run into a wall, please bring the topic up on the llvmdev list.  At the
-very worst, you can always treat LLVM as if it were a "dumb code generator" and
-implement the high-level optimizations you desire in your front-end, on the
-language-specific AST.
-</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<h2><a name="tipsandtricks">Tips and Tricks</a></h2>
-<!-- *********************************************************************** -->
-
-<div>
-
-<p>There is a variety of useful tips and tricks that you come to know after
-working on/with LLVM that aren't obvious at first glance.  Instead of letting
-everyone rediscover them, this section talks about some of these issues.</p>
-
-<!-- ======================================================================= -->
-<h4><a name="offsetofsizeof">Implementing portable offsetof/sizeof</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>One interesting thing that comes up, if you are trying to keep the code 
-generated by your compiler "target independent", is that you often need to know
-the size of some LLVM type or the offset of some field in an llvm structure.
-For example, you might need to pass the size of a type into a function that
-allocates memory.</p>
-
-<p>Unfortunately, this can vary widely across targets: for example the width of
-a pointer is trivially target-specific.  However, there is a <a 
-href="http://nondot.org/sabre/LLVMNotes/SizeOf-OffsetOf-VariableSizedStructs.txt">clever
-way to use the getelementptr instruction</a> that allows you to compute this
-in a portable way.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<h4><a name="gcstack">Garbage Collected Stack Frames</a></h4>
-<!-- ======================================================================= -->
-
-<div>
-
-<p>Some languages want to explicitly manage their stack frames, often so that
-they are garbage collected or to allow easy implementation of closures.  There
-are often better ways to implement these features than explicit stack frames,
-but <a 
-href="http://nondot.org/sabre/LLVMNotes/ExplicitlyManagedStackFrames.txt">LLVM
-does support them,</a> if you want.  It requires your front-end to convert the
-code into <a 
-href="http://en.wikipedia.org/wiki/Continuation-passing_style">Continuation
-Passing Style</a> and the use of tail calls (which LLVM also supports).</p>
-
-</div>
-
-</div>
-
-<!-- *********************************************************************** -->
-<hr>
-<address>
-  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
-  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
-  <a href="http://validator.w3.org/check/referer"><img
-  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
-
-  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
-  <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date$
-</address>
-</body>
-</html>
diff --git a/docs/tutorial/OCamlLangImpl8.rst b/docs/tutorial/OCamlLangImpl8.rst
new file mode 100644
index 0000000000..4058991f19
--- /dev/null
+++ b/docs/tutorial/OCamlLangImpl8.rst
@@ -0,0 +1,269 @@
+======================================================
+Kaleidoscope: Conclusion and other useful LLVM tidbits
+======================================================
+
+.. contents::
+   :local:
+
+Written by `Chris Lattner <mailto:sabre@nondot.org>`_
+
+Tutorial Conclusion
+===================
+
+Welcome to the final chapter of the "`Implementing a language with
+LLVM <index.html>`_" tutorial. In the course of this tutorial, we have
+grown our little Kaleidoscope language from being a useless toy, to
+being a semi-interesting (but probably still useless) toy. :)
+
+It is interesting to see how far we've come, and how little code it has
+taken. We built the entire lexer, parser, AST, code generator, and an
+interactive run-loop (with a JIT!) by-hand in under 700 lines of
+(non-comment/non-blank) code.
+
+Our little language supports a couple of interesting features: it
+supports user defined binary and unary operators, it uses JIT
+compilation for immediate evaluation, and it supports a few control flow
+constructs with SSA construction.
+
+Part of the idea of this tutorial was to show you how easy and fun it
+can be to define, build, and play with languages. Building a compiler
+need not be a scary or mystical process! Now that you've seen some of
+the basics, I strongly encourage you to take the code and hack on it.
+For example, try adding:
+
+-  **global variables** - While global variables have questional value
+   in modern software engineering, they are often useful when putting
+   together quick little hacks like the Kaleidoscope compiler itself.
+   Fortunately, our current setup makes it very easy to add global
+   variables: just have value lookup check to see if an unresolved
+   variable is in the global variable symbol table before rejecting it.
+   To create a new global variable, make an instance of the LLVM
+   ``GlobalVariable`` class.
+-  **typed variables** - Kaleidoscope currently only supports variables
+   of type double. This gives the language a very nice elegance, because
+   only supporting one type means that you never have to specify types.
+   Different languages have different ways of handling this. The easiest
+   way is to require the user to specify types for every variable
+   definition, and record the type of the variable in the symbol table
+   along with its Value\*.
+-  **arrays, structs, vectors, etc** - Once you add types, you can start
+   extending the type system in all sorts of interesting ways. Simple
+   arrays are very easy and are quite useful for many different
+   applications. Adding them is mostly an exercise in learning how the
+   LLVM `getelementptr <../LangRef.html#i_getelementptr>`_ instruction
+   works: it is so nifty/unconventional, it `has its own
+   FAQ <../GetElementPtr.html>`_! If you add support for recursive types
+   (e.g. linked lists), make sure to read the `section in the LLVM
+   Programmer's Manual <../ProgrammersManual.html#TypeResolve>`_ that
+   describes how to construct them.
+-  **standard runtime** - Our current language allows the user to access
+   arbitrary external functions, and we use it for things like "printd"
+   and "putchard". As you extend the language to add higher-level
+   constructs, often these constructs make the most sense if they are
+   lowered to calls into a language-supplied runtime. For example, if
+   you add hash tables to the language, it would probably make sense to
+   add the routines to a runtime, instead of inlining them all the way.
+-  **memory management** - Currently we can only access the stack in
+   Kaleidoscope. It would also be useful to be able to allocate heap
+   memory, either with calls to the standard libc malloc/free interface
+   or with a garbage collector. If you would like to use garbage
+   collection, note that LLVM fully supports `Accurate Garbage
+   Collection <../GarbageCollection.html>`_ including algorithms that
+   move objects and need to scan/update the stack.
+-  **debugger support** - LLVM supports generation of `DWARF Debug
+   info <../SourceLevelDebugging.html>`_ which is understood by common
+   debuggers like GDB. Adding support for debug info is fairly
+   straightforward. The best way to understand it is to compile some
+   C/C++ code with "``llvm-gcc -g -O0``" and taking a look at what it
+   produces.
+-  **exception handling support** - LLVM supports generation of `zero
+   cost exceptions <../ExceptionHandling.html>`_ which interoperate with
+   code compiled in other languages. You could also generate code by
+   implicitly making every function return an error value and checking
+   it. You could also make explicit use of setjmp/longjmp. There are
+   many different ways to go here.
+-  **object orientation, generics, database access, complex numbers,
+   geometric programming, ...** - Really, there is no end of crazy
+   features that you can add to the language.
+-  **unusual domains** - We've been talking about applying LLVM to a
+   domain that many people are interested in: building a compiler for a
+   specific language. However, there are many other domains that can use
+   compiler technology that are not typically considered. For example,
+   LLVM has been used to implement OpenGL graphics acceleration,
+   translate C++ code to ActionScript, and many other cute and clever
+   things. Maybe you will be the first to JIT compile a regular
+   expression interpreter into native code with LLVM?
+
+Have fun - try doing something crazy and unusual. Building a language
+like everyone else always has, is much less fun than trying something a
+little crazy or off the wall and seeing how it turns out. If you get
+stuck or want to talk about it, feel free to email the `llvmdev mailing
+list <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_: it has lots
+of people who are interested in languages and are often willing to help
+out.
+
+Before we end this tutorial, I want to talk about some "tips and tricks"
+for generating LLVM IR. These are some of the more subtle things that
+may not be obvious, but are very useful if you want to take advantage of
+LLVM's capabilities.
+
+Properties of the LLVM IR
+=========================
+
+We have a couple common questions about code in the LLVM IR form - lets
+just get these out of the way right now, shall we?
+
+Target Independence
+-------------------
+
+Kaleidoscope is an example of a "portable language": any program written
+in Kaleidoscope will work the same way on any target that it runs on.
+Many other languages have this property, e.g. lisp, java, haskell,
+javascript, python, etc (note that while these languages are portable,
+not all their libraries are).
+
+One nice aspect of LLVM is that it is often capable of preserving target
+independence in the IR: you can take the LLVM IR for a
+Kaleidoscope-compiled program and run it on any target that LLVM
+supports, even emitting C code and compiling that on targets that LLVM
+doesn't support natively. You can trivially tell that the Kaleidoscope
+compiler generates target-independent code because it never queries for
+any target-specific information when generating code.
+
+The fact that LLVM provides a compact, target-independent,
+representation for code gets a lot of people excited. Unfortunately,
+these people are usually thinking about C or a language from the C
+family when they are asking questions about language portability. I say
+"unfortunately", because there is really no way to make (fully general)
+C code portable, other than shipping the source code around (and of
+course, C source code is not actually portable in general either - ever
+port a really old application from 32- to 64-bits?).
+
+The problem with C (again, in its full generality) is that it is heavily
+laden with target specific assumptions. As one simple example, the
+preprocessor often destructively removes target-independence from the
+code when it processes the input text:
+
+.. code-block:: c
+
+    #ifdef __i386__
+      int X = 1;
+    #else
+      int X = 42;
+    #endif
+
+While it is possible to engineer more and more complex solutions to
+problems like this, it cannot be solved in full generality in a way that
+is better than shipping the actual source code.
+
+That said, there are interesting subsets of C that can be made portable.
+If you are willing to fix primitive types to a fixed size (say int =
+32-bits, and long = 64-bits), don't care about ABI compatibility with
+existing binaries, and are willing to give up some other minor features,
+you can have portable code. This can make sense for specialized domains
+such as an in-kernel language.
+
+Safety Guarantees
+-----------------
+
+Many of the languages above are also "safe" languages: it is impossible
+for a program written in Java to corrupt its address space and crash the
+process (assuming the JVM has no bugs). Safety is an interesting
+property that requires a combination of language design, runtime
+support, and often operating system support.
+
+It is certainly possible to implement a safe language in LLVM, but LLVM
+IR does not itself guarantee safety. The LLVM IR allows unsafe pointer
+casts, use after free bugs, buffer over-runs, and a variety of other
+problems. Safety needs to be implemented as a layer on top of LLVM and,
+conveniently, several groups have investigated this. Ask on the `llvmdev
+mailing list <http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev>`_ if
+you are interested in more details.
+
+Language-Specific Optimizations
+-------------------------------
+
+One thing about LLVM that turns off many people is that it does not
+solve all the world's problems in one system (sorry 'world hunger',
+someone else will have to solve you some other day). One specific
+complaint is that people perceive LLVM as being incapable of performing
+high-level language-specific optimization: LLVM "loses too much
+information".
+
+Unfortunately, this is really not the place to give you a full and
+unified version of "Chris Lattner's theory of compiler design". Instead,
+I'll make a few observations:
+
+First, you're right that LLVM does lose information. For example, as of
+this writing, there is no way to distinguish in the LLVM IR whether an
+SSA-value came from a C "int" or a C "long" on an ILP32 machine (other
+than debug info). Both get compiled down to an 'i32' value and the
+information about what it came from is lost. The more general issue
+here, is that the LLVM type system uses "structural equivalence" instead
+of "name equivalence". Another place this surprises people is if you
+have two types in a high-level language that have the same structure
+(e.g. two different structs that have a single int field): these types
+will compile down into a single LLVM type and it will be impossible to
+tell what it came from.
+
+Second, while LLVM does lose information, LLVM is not a fixed target: we
+continue to enhance and improve it in many different ways. In addition
+to adding new features (LLVM did not always support exceptions or debug
+info), we also extend the IR to capture important information for
+optimization (e.g. whether an argument is sign or zero extended,
+information about pointers aliasing, etc). Many of the enhancements are
+user-driven: people want LLVM to include some specific feature, so they
+go ahead and extend it.
+
+Third, it is *possible and easy* to add language-specific optimizations,
+and you have a number of choices in how to do it. As one trivial
+example, it is easy to add language-specific optimization passes that
+"know" things about code compiled for a language. In the case of the C
+family, there is an optimization pass that "knows" about the standard C
+library functions. If you call "exit(0)" in main(), it knows that it is
+safe to optimize that into "return 0;" because C specifies what the
+'exit' function does.
+
+In addition to simple library knowledge, it is possible to embed a
+variety of other language-specific information into the LLVM IR. If you
+have a specific need and run into a wall, please bring the topic up on
+the llvmdev list. At the very worst, you can always treat LLVM as if it
+were a "dumb code generator" and implement the high-level optimizations
+you desire in your front-end, on the language-specific AST.
+
+Tips and Tricks
+===============
+
+There is a variety of useful tips and tricks that you come to know after
+working on/with LLVM that aren't obvious at first glance. Instead of
+letting everyone rediscover them, this section talks about some of these
+issues.
+
+Implementing portable offsetof/sizeof
+-------------------------------------
+
+One interesting thing that comes up, if you are trying to keep the code
+generated by your compiler "target independent", is that you often need
+to know the size of some LLVM type or the offset of some field in an
+llvm structure. For example, you might need to pass the size of a type
+into a function that allocates memory.
+
+Unfortunately, this can vary widely across targets: for example the
+width of a pointer is trivially target-specific. However, there is a
+`clever way to use the getelementptr
+instruction <http://nondot.org/sabre/LLVMNotes/SizeOf-OffsetOf-VariableSizedStructs.txt>`_
+that allows you to compute this in a portable way.
+
+Garbage Collected Stack Frames
+------------------------------
+
+Some languages want to explicitly manage their stack frames, often so
+that they are garbage collected or to allow easy implementation of
+closures. There are often better ways to implement these features than
+explicit stack frames, but `LLVM does support
+them, <http://nondot.org/sabre/LLVMNotes/ExplicitlyManagedStackFrames.txt>`_
+if you want. It requires your front-end to convert the code into
+`Continuation Passing
+Style <http://en.wikipedia.org/wiki/Continuation-passing_style>`_ and
+the use of tail calls (which LLVM also supports).
+
diff --git a/docs/tutorial/index.html b/docs/tutorial/index.html
deleted file mode 100644
index 2c11a9a48b..0000000000
--- a/docs/tutorial/index.html
+++ /dev/null
@@ -1,48 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-                      "http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-  <title>LLVM Tutorial: Table of Contents</title>
-  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-  <meta name="author" content="Owen Anderson">
-  <meta name="description" 
-  content="LLVM Tutorial: Table of Contents.">
-  <link rel="stylesheet" href="../_static/llvm.css" type="text/css">
-</head>
-
-<body>
-
-<h1>LLVM Tutorial: Table of Contents</h1>
-
-<ol>
-  <li>Kaleidoscope: Implementing a Language with LLVM
-  <ol>
-    <li><a href="LangImpl1.html">Tutorial Introduction and the Lexer</a></li>
-    <li><a href="LangImpl2.html">Implementing a Parser and AST</a></li>
-    <li><a href="LangImpl3.html">Implementing Code Generation to LLVM IR</a></li>
-    <li><a href="LangImpl4.html">Adding JIT and Optimizer Support</a></li>
-    <li><a href="LangImpl5.html">Extending the language: control flow</a></li>
-    <li><a href="LangImpl6.html">Extending the language: user-defined operators</a></li>
-    <li><a href="LangImpl7.html">Extending the language: mutable variables / SSA construction</a></li>
-    <li><a href="LangImpl8.html">Conclusion and other useful LLVM tidbits</a></li>
-  </ol></li>
-  <li>Kaleidoscope: Implementing a Language with LLVM in Objective Caml
-  <ol>
-    <li><a href="OCamlLangImpl1.html">Tutorial Introduction and the Lexer</a></li>
-    <li><a href="OCamlLangImpl2.html">Implementing a Parser and AST</a></li>
-    <li><a href="OCamlLangImpl3.html">Implementing Code Generation to LLVM IR</a></li>
-    <li><a href="OCamlLangImpl4.html">Adding JIT and Optimizer Support</a></li>
-    <li><a href="OCamlLangImpl5.html">Extending the language: control flow</a></li>
-    <li><a href="OCamlLangImpl6.html">Extending the language: user-defined operators</a></li>
-    <li><a href="OCamlLangImpl7.html">Extending the language: mutable variables / SSA construction</a></li>
-    <li><a href="OCamlLangImpl8.html">Conclusion and other useful LLVM tidbits</a></li>
-  </ol></li>
-  <li>Advanced Topics
-  <ol>
-    <li><a href="http://llvm.org/pubs/2004-09-22-LCPCLLVMTutorial.html">Writing
-        an Optimization for LLVM</a></li>
-  </ol></li>
-</ol>
-
-</body>
-</html>
diff --git a/docs/tutorial/index.rst b/docs/tutorial/index.rst
new file mode 100644
index 0000000000..4658f9619e
--- /dev/null
+++ b/docs/tutorial/index.rst
@@ -0,0 +1,30 @@
+================================
+LLVM Tutorial: Table of Contents
+================================
+
+Kaleidoscope: Implementing a Language with LLVM
+===============================================
+
+.. toctree::
+   :titlesonly:
+   :glob:
+   :numbered:
+
+   LangImpl*
+
+Kaleidoscope: Implementing a Language with LLVM in Objective Caml
+=================================================================
+
+.. toctree::
+   :titlesonly:
+   :glob:
+   :numbered:
+
+   OCamlLangImpl*
+
+
+Advanced Topics
+===============
+
+#. `Writing an Optimization for LLVM <http://llvm.org/pubs/2004-09-22-LCPCLLVMTutorial.html>`_
+
diff --git a/docs/userguides.rst b/docs/userguides.rst
index 8f184205c8..cfb6dbeb5e 100644
--- a/docs/userguides.rst
+++ b/docs/userguides.rst
@@ -21,6 +21,8 @@ User Guides
    SphinxQuickstartTemplate
    Phabricator
    TestingGuide
+   tutorial/index
+   ReleaseNotes
 
 * :ref:`getting_started`
     
@@ -37,13 +39,12 @@ User Guides
 
    Notes on building and testing LLVM/Clang on ARM.
 
-* `Getting Started with the LLVM System using Microsoft Visual Studio
-  <GettingStartedVS.html>`_
+* :doc:`GettingStartedVS`
 
    An addendum to the main Getting Started guide for those using Visual Studio
    on Windows.
     
-* `LLVM Tutorial <tutorial/>`_
+* :doc:`tutorial/index`
 
    A walk through the process of using LLVM for a custom language, and the
    facilities LLVM offers in tutorial form.
@@ -65,7 +66,7 @@ User Guides
 
    A list of common questions and problems and their solutions.
     
-* `Release notes for the current release <ReleaseNotes.html>`_
+* :doc:`Release notes for the current release <ReleaseNotes>`
 
    This describes new features, known bugs, and other limitations.
 
diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp
index b002d1f496..5f023639c4 100644
--- a/examples/BrainF/BrainF.cpp
+++ b/examples/BrainF/BrainF.cpp
@@ -24,10 +24,10 @@
 //===--------------------------------------------------------------------===//
 
 #include "BrainF.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
-#include "llvm/ADT/STLExtras.h"
 #include <iostream>
 using namespace llvm;
 
diff --git a/examples/BrainF/BrainFDriver.cpp b/examples/BrainF/BrainFDriver.cpp
index 58617b7f38..95b1a84458 100644
--- a/examples/BrainF/BrainFDriver.cpp
+++ b/examples/BrainF/BrainFDriver.cpp
@@ -25,17 +25,17 @@
 //===--------------------------------------------------------------------===//
 
 #include "BrainF.h"
-#include "llvm/Constants.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Constants.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JIT.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/raw_ostream.h"
-#include <iostream>
 #include <fstream>
+#include <iostream>
 using namespace llvm;
 
 //Command line options
diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp
index 2d35ca73e8..91c2f5c0a1 100644
--- a/examples/ExceptionDemo/ExceptionDemo.cpp
+++ b/examples/ExceptionDemo/ExceptionDemo.cpp
@@ -48,20 +48,20 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/JIT.h"
 #include "llvm/IRBuilder.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
 
 // FIXME: Although all systems tested with (Linux, OS X), do not need this
 //        header file included. A user on ubuntu reported, undefined symbols
diff --git a/examples/Fibonacci/fibonacci.cpp b/examples/Fibonacci/fibonacci.cpp
index 417ad6f4b6..9d21d24c76 100644
--- a/examples/Fibonacci/fibonacci.cpp
+++ b/examples/Fibonacci/fibonacci.cpp
@@ -23,17 +23,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
 #include "llvm/Analysis/Verifier.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 static Function *CreateFibFunction(Module *M, LLVMContext &Context) {
diff --git a/examples/HowToUseJIT/HowToUseJIT.cpp b/examples/HowToUseJIT/HowToUseJIT.cpp
index 5588e923df..544288abc7 100644
--- a/examples/HowToUseJIT/HowToUseJIT.cpp
+++ b/examples/HowToUseJIT/HowToUseJIT.cpp
@@ -36,15 +36,15 @@
 
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/JIT.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/ExecutionEngine/Interpreter.h"
-#include "llvm/ExecutionEngine/GenericValue.h"
-#include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
diff --git a/examples/Kaleidoscope/Chapter2/toy.cpp b/examples/Kaleidoscope/Chapter2/toy.cpp
index f4f09d0b35..1cf6caacb6 100644
--- a/examples/Kaleidoscope/Chapter2/toy.cpp
+++ b/examples/Kaleidoscope/Chapter2/toy.cpp
@@ -1,7 +1,7 @@
 #include <cstdio>
 #include <cstdlib>
-#include <string>
 #include <map>
+#include <string>
 #include <vector>
 
 //===----------------------------------------------------------------------===//
diff --git a/examples/Kaleidoscope/Chapter3/toy.cpp b/examples/Kaleidoscope/Chapter3/toy.cpp
index c1e34b2f09..6eeb7ac404 100644
--- a/examples/Kaleidoscope/Chapter3/toy.cpp
+++ b/examples/Kaleidoscope/Chapter3/toy.cpp
@@ -1,11 +1,11 @@
+#include "llvm/Analysis/Verifier.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
-#include "llvm/Analysis/Verifier.h"
 #include <cstdio>
-#include <string>
 #include <map>
+#include <string>
 #include <vector>
 using namespace llvm;
 
diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp
index bc6028c900..4a8f966e63 100644
--- a/examples/Kaleidoscope/Chapter4/toy.cpp
+++ b/examples/Kaleidoscope/Chapter4/toy.cpp
@@ -1,3 +1,6 @@
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/JIT.h"
@@ -5,14 +8,11 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
 #include <cstdio>
-#include <string>
 #include <map>
+#include <string>
 #include <vector>
 using namespace llvm;
 
diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp
index 2b0b9d54fe..cd005cdeca 100644
--- a/examples/Kaleidoscope/Chapter5/toy.cpp
+++ b/examples/Kaleidoscope/Chapter5/toy.cpp
@@ -1,3 +1,6 @@
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/JIT.h"
@@ -5,14 +8,11 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
 #include <cstdio>
-#include <string>
 #include <map>
+#include <string>
 #include <vector>
 using namespace llvm;
 
diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp
index b751e3516b..a66f68b3fd 100644
--- a/examples/Kaleidoscope/Chapter6/toy.cpp
+++ b/examples/Kaleidoscope/Chapter6/toy.cpp
@@ -1,3 +1,6 @@
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/JIT.h"
@@ -5,14 +8,11 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
 #include <cstdio>
-#include <string>
 #include <map>
+#include <string>
 #include <vector>
 using namespace llvm;
 
diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp
index 0ac0996590..6bc1e129ea 100644
--- a/examples/Kaleidoscope/Chapter7/toy.cpp
+++ b/examples/Kaleidoscope/Chapter7/toy.cpp
@@ -1,3 +1,6 @@
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/JIT.h"
@@ -5,14 +8,11 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
 #include <cstdio>
-#include <string>
 #include <map>
+#include <string>
 #include <vector>
 using namespace llvm;
 
diff --git a/examples/ModuleMaker/ModuleMaker.cpp b/examples/ModuleMaker/ModuleMaker.cpp
index 6bc52c12a0..7ab38c1b2d 100644
--- a/examples/ModuleMaker/ModuleMaker.cpp
+++ b/examples/ModuleMaker/ModuleMaker.cpp
@@ -13,12 +13,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/examples/ParallelJIT/ParallelJIT.cpp b/examples/ParallelJIT/ParallelJIT.cpp
index 305cf1dde0..4c1eb49e16 100644
--- a/examples/ParallelJIT/ParallelJIT.cpp
+++ b/examples/ParallelJIT/ParallelJIT.cpp
@@ -17,17 +17,17 @@
 // call into the JIT at the same time (or the best possible approximation of the
 // same time). This test had assertion errors until I got the locking right.
 
-#include <pthread.h>
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/ExecutionEngine/Interpreter.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
 #include "llvm/Support/TargetSelect.h"
 #include <iostream>
+#include <pthread.h>
 using namespace llvm;
 
 static Function* createAdd1(Module *M) {
diff --git a/include/llvm-c/Disassembler.h b/include/llvm-c/Disassembler.h
index b8c4ad9ad7..f0872c1436 100644
--- a/include/llvm-c/Disassembler.h
+++ b/include/llvm-c/Disassembler.h
@@ -139,13 +139,26 @@ extern "C" {
  * by passing a block of information in the DisInfo parameter and specifying the
  * TagType and callback functions as described above.  These can all be passed
  * as NULL.  If successful, this returns a disassembler context.  If not, it
- * returns NULL.
+ * returns NULL. This function is equivalent to calling LLVMCreateDisasmCPU()
+ * with an empty CPU name.
  */
 LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
                                       int TagType, LLVMOpInfoCallback GetOpInfo,
                                       LLVMSymbolLookupCallback SymbolLookUp);
 
 /**
+ * Create a disassembler for the TripleName and a specific CPU.  Symbolic
+ * disassembly is supported by passing a block of information in the DisInfo
+ * parameter and specifying the TagType and callback functions as described
+ * above.  These can all be passed * as NULL.  If successful, this returns a
+ * disassembler context.  If not, it returns NULL.
+ */
+LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
+                                         void *DisInfo, int TagType,
+                                         LLVMOpInfoCallback GetOpInfo,
+                                         LLVMSymbolLookupCallback SymbolLookUp);
+
+/**
  * Set the disassembler's options.  Returns 1 if it can set the Options and 0
  * otherwise.
  */
@@ -153,6 +166,8 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DC, uint64_t Options);
 
 /* The option to produce marked up assembly. */
 #define LLVMDisassembler_Option_UseMarkup 1
+/* The option to print immediates as hex. */
+#define LLVMDisassembler_Option_PrintImmHex 2
 
 /**
  * Dispose of a disassembler context.
diff --git a/include/llvm-c/TargetMachine.h b/include/llvm-c/TargetMachine.h
index 29668de465..691abdfcb4 100644
--- a/include/llvm-c/TargetMachine.h
+++ b/include/llvm-c/TargetMachine.h
@@ -20,6 +20,7 @@
 #define LLVM_C_TARGETMACHINE_H
 
 #include "llvm-c/Core.h"
+#include "llvm-c/Target.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index c7c8016b83..95cd23d2ce 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -274,7 +274,7 @@ public:
       initSlowCase(that);
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   /// @brief Move Constructor.
   APInt(APInt&& that) : BitWidth(that.BitWidth), VAL(that.VAL) {
     that.BitWidth = 0;
@@ -601,7 +601,7 @@ public:
     return AssignSlowCase(RHS);
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   /// @brief Move assignment operator.
   APInt& operator=(APInt&& that) {
     if (!isSingleWord())
diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h
index 9d6388f7ee..82cfdf437d 100644
--- a/include/llvm/ADT/BitVector.h
+++ b/include/llvm/ADT/BitVector.h
@@ -98,7 +98,7 @@ public:
     std::memcpy(Bits, RHS.Bits, Capacity * sizeof(BitWord));
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   BitVector(BitVector &&RHS)
     : Bits(RHS.Bits), Size(RHS.Size), Capacity(RHS.Capacity) {
     RHS.Bits = 0;
@@ -452,7 +452,7 @@ public:
     return *this;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   const BitVector &operator=(BitVector &&RHS) {
     if (this == &RHS) return *this;
 
diff --git a/include/llvm/ADT/DAGDeltaAlgorithm.h b/include/llvm/ADT/DAGDeltaAlgorithm.h
index 2dfed075de..3dd862c8b2 100644
--- a/include/llvm/ADT/DAGDeltaAlgorithm.h
+++ b/include/llvm/ADT/DAGDeltaAlgorithm.h
@@ -9,8 +9,8 @@
 #ifndef LLVM_ADT_DAGDELTAALGORITHM_H
 #define LLVM_ADT_DAGDELTAALGORITHM_H
 
-#include <vector>
 #include <set>
+#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/ADT/DeltaAlgorithm.h b/include/llvm/ADT/DeltaAlgorithm.h
index 7bf7960c63..4d07e04478 100644
--- a/include/llvm/ADT/DeltaAlgorithm.h
+++ b/include/llvm/ADT/DeltaAlgorithm.h
@@ -9,8 +9,8 @@
 #ifndef LLVM_ADT_DELTAALGORITHM_H
 #define LLVM_ADT_DELTAALGORITHM_H
 
-#include <vector>
 #include <set>
+#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index ac4bdbd126..31e685f96f 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -14,20 +14,20 @@
 #ifndef LLVM_ADT_DENSEMAP_H
 #define LLVM_ADT_DENSEMAP_H
 
-#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
 #include "llvm/Support/type_traits.h"
-#include "llvm/ADT/DenseMapInfo.h"
 #include <algorithm>
-#include <iterator>
-#include <new>
-#include <utility>
 #include <cassert>
 #include <climits>
 #include <cstddef>
 #include <cstring>
+#include <iterator>
+#include <new>
+#include <utility>
 
 namespace llvm {
 
@@ -198,7 +198,7 @@ public:
     return FindAndConstruct(Key).second;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   value_type& FindAndConstruct(KeyT &&Key) {
     BucketT *TheBucket;
     if (LookupBucketFor(Key, TheBucket))
@@ -383,7 +383,7 @@ private:
     return TheBucket;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   BucketT *InsertIntoBucket(const KeyT &Key, ValueT &&Value,
                             BucketT *TheBucket) {
     TheBucket = InsertIntoBucketImpl(Key, TheBucket);
@@ -536,7 +536,7 @@ public:
     copyFrom(other);
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   DenseMap(DenseMap &&other) {
     init(0);
     swap(other);
@@ -566,7 +566,7 @@ public:
     return *this;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   DenseMap& operator=(DenseMap &&other) {
     this->destroyAll();
     operator delete(Buckets);
@@ -700,7 +700,7 @@ public:
     copyFrom(other);
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   SmallDenseMap(SmallDenseMap &&other) {
     init(0);
     swap(other);
@@ -795,7 +795,7 @@ public:
     return *this;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   SmallDenseMap& operator=(SmallDenseMap &&other) {
     this->destroyAll();
     deallocateBuckets();
diff --git a/include/llvm/ADT/DenseSet.h b/include/llvm/ADT/DenseSet.h
index 8ab9a33200..776863a4c0 100644
--- a/include/llvm/ADT/DenseSet.h
+++ b/include/llvm/ADT/DenseSet.h
@@ -32,6 +32,7 @@ public:
 
   bool empty() const { return TheMap.empty(); }
   unsigned size() const { return TheMap.size(); }
+  size_t getMemorySize() const { return TheMap.getMemorySize(); }
 
   /// Grow the denseset so that it has at least Size buckets. Does not shrink
   void resize(size_t Size) { TheMap.resize(Size); }
diff --git a/include/llvm/ADT/DepthFirstIterator.h b/include/llvm/ADT/DepthFirstIterator.h
index 519b18052b..644544253a 100644
--- a/include/llvm/ADT/DepthFirstIterator.h
+++ b/include/llvm/ADT/DepthFirstIterator.h
@@ -34,8 +34,8 @@
 #define LLVM_ADT_DEPTHFIRSTITERATOR_H
 
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include <set>
 #include <vector>
 
diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h
index 375d84abeb..91794dea69 100644
--- a/include/llvm/ADT/FoldingSet.h
+++ b/include/llvm/ADT/FoldingSet.h
@@ -16,9 +16,9 @@
 #ifndef LLVM_ADT_FOLDINGSET_H
 #define LLVM_ADT_FOLDINGSET_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
   class APFloat;
diff --git a/include/llvm/ADT/ImmutableList.h b/include/llvm/ADT/ImmutableList.h
index 20bdd903f7..998d785dd7 100644
--- a/include/llvm/ADT/ImmutableList.h
+++ b/include/llvm/ADT/ImmutableList.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_ADT_IMLIST_H
 #define LLVM_ADT_IMLIST_H
 
-#include "llvm/Support/Allocator.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
 
diff --git a/include/llvm/ADT/ImmutableMap.h b/include/llvm/ADT/ImmutableMap.h
index 4883c5ba0a..f9baec213a 100644
--- a/include/llvm/ADT/ImmutableMap.h
+++ b/include/llvm/ADT/ImmutableMap.h
@@ -288,6 +288,13 @@ public:
       Factory(F) {
     if (Root) { Root->retain(); }
   }
+
+  explicit ImmutableMapRef(const ImmutableMap<KeyT, ValT> &X,
+                           typename ImmutableMap<KeyT, ValT>::Factory &F)
+    : Root(X.getRootWithoutRetain()),
+      Factory(F.getTreeFactory()) {
+    if (Root) { Root->retain(); }
+  }
   
   ImmutableMapRef(const ImmutableMapRef &X)
     : Root(X.Root),
@@ -318,12 +325,20 @@ public:
     return ImmutableMapRef(0, F);
   }
 
-  ImmutableMapRef add(key_type_ref K, data_type_ref D) {
+  void manualRetain() {
+    if (Root) Root->retain();
+  }
+
+  void manualRelease() {
+    if (Root) Root->release();
+  }
+
+  ImmutableMapRef add(key_type_ref K, data_type_ref D) const {
     TreeTy *NewT = Factory->add(Root, std::pair<key_type, data_type>(K, D));
     return ImmutableMapRef(NewT, Factory);
   }
 
-  ImmutableMapRef remove(key_type_ref K) {
+  ImmutableMapRef remove(key_type_ref K) const {
     TreeTy *NewT = Factory->remove(Root, K);
     return ImmutableMapRef(NewT, Factory);
   }
diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h
index 3900f96be1..0982657ccf 100644
--- a/include/llvm/ADT/ImmutableSet.h
+++ b/include/llvm/ADT/ImmutableSet.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_ADT_IMSET_H
 #define LLVM_ADT_IMSET_H
 
-#include "llvm/Support/Allocator.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
diff --git a/include/llvm/ADT/IntervalMap.h b/include/llvm/ADT/IntervalMap.h
index 931b67e409..da6ee4297e 100644
--- a/include/llvm/ADT/IntervalMap.h
+++ b/include/llvm/ADT/IntervalMap.h
@@ -99,8 +99,8 @@
 #ifndef LLVM_ADT_INTERVALMAP_H
 #define LLVM_ADT_INTERVALMAP_H
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/RecyclingAllocator.h"
 #include <iterator>
diff --git a/include/llvm/ADT/IntrusiveRefCntPtr.h b/include/llvm/ADT/IntrusiveRefCntPtr.h
index a9724ee154..58495035c4 100644
--- a/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -123,7 +123,7 @@ namespace llvm {
       retain();
     }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
     IntrusiveRefCntPtr(IntrusiveRefCntPtr&& S) : Obj(S.Obj) {
       S.Obj = 0;
     }
diff --git a/include/llvm/ADT/Optional.h b/include/llvm/ADT/Optional.h
index f43aeb1bc4..aa43f552d5 100644
--- a/include/llvm/ADT/Optional.h
+++ b/include/llvm/ADT/Optional.h
@@ -19,7 +19,7 @@
 #include "llvm/Support/Compiler.h"
 #include <cassert>
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
 #include <utility>
 #endif
 
@@ -33,7 +33,7 @@ public:
   explicit Optional() : x(), hasVal(false) {}
   Optional(const T &y) : x(y), hasVal(true) {}
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   Optional(T &&y) : x(std::forward<T>(y)), hasVal(true) {}
 #endif
 
@@ -48,12 +48,17 @@ public:
   }
   
   const T* getPointer() const { assert(hasVal); return &x; }
-  const T& getValue() const { assert(hasVal); return x; }
+  const T& getValue() const LLVM_LVALUE_FUNCTION { assert(hasVal); return x; }
 
   operator bool() const { return hasVal; }
   bool hasValue() const { return hasVal; }
   const T* operator->() const { return getPointer(); }
-  const T& operator*() const { assert(hasVal); return x; }
+  const T& operator*() const LLVM_LVALUE_FUNCTION { assert(hasVal); return x; }
+
+#if LLVM_HAS_RVALUE_REFERENCE_THIS
+  T&& getValue() && { assert(hasVal); return std::move(x); }
+  T&& operator*() && { assert(hasVal); return std::move(x); } 
+#endif
 };
 
 template<typename T> struct simplify_type;
diff --git a/include/llvm/ADT/OwningPtr.h b/include/llvm/ADT/OwningPtr.h
index 05bcd40d08..ea229916f9 100644
--- a/include/llvm/ADT/OwningPtr.h
+++ b/include/llvm/ADT/OwningPtr.h
@@ -32,7 +32,7 @@ class OwningPtr {
 public:
   explicit OwningPtr(T *P = 0) : Ptr(P) {}
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   OwningPtr(OwningPtr &&Other) : Ptr(Other.take()) {}
 
   OwningPtr &operator=(OwningPtr &&Other) {
@@ -95,7 +95,7 @@ class OwningArrayPtr {
 public:
   explicit OwningArrayPtr(T *P = 0) : Ptr(P) {}
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   OwningArrayPtr(OwningArrayPtr &&Other) : Ptr(Other.take()) {}
 
   OwningArrayPtr &operator=(OwningArrayPtr &&Other) {
diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h
index 71c379bad5..cce2efb6ac 100644
--- a/include/llvm/ADT/PointerIntPair.h
+++ b/include/llvm/ADT/PointerIntPair.h
@@ -57,11 +57,13 @@ class PointerIntPair {
   };
 public:
   PointerIntPair() : Value(0) {}
-  PointerIntPair(PointerTy Ptr, IntType Int) : Value(0) {
+  PointerIntPair(PointerTy Ptr, IntType Int) {
     assert(IntBits <= PtrTraits::NumLowBitsAvailable &&
            "PointerIntPair formed with integer size too large for pointer");
-    setPointer(Ptr);
-    setInt(Int);
+    setPointerAndInt(Ptr, Int);
+  }
+  explicit PointerIntPair(PointerTy Ptr) {
+    initWithPointer(Ptr);
   }
 
   PointerTy getPointer() const {
@@ -91,6 +93,25 @@ public:
     Value |= IntVal << IntShift;  // Set new integer.
   }
 
+  void initWithPointer(PointerTy Ptr) {
+    intptr_t PtrVal
+      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr));
+    assert((PtrVal & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
+           "Pointer is not sufficiently aligned");
+    Value = PtrVal;
+  }
+
+  void setPointerAndInt(PointerTy Ptr, IntType Int) {
+    intptr_t PtrVal
+      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr));
+    assert((PtrVal & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
+           "Pointer is not sufficiently aligned");
+    intptr_t IntVal = Int;
+    assert(IntVal < (1 << IntBits) && "Integer too large for field");
+
+    Value = PtrVal | (IntVal << IntShift);
+  }
+
   PointerTy const *getAddrOfPointer() const {
     return const_cast<PointerIntPair *>(this)->getAddrOfPointer();
   }
diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h
index a9e86d2200..f42515ac77 100644
--- a/include/llvm/ADT/PointerUnion.h
+++ b/include/llvm/ADT/PointerUnion.h
@@ -95,15 +95,11 @@ namespace llvm {
   public:
     PointerUnion() {}
     
-    PointerUnion(PT1 V) {
-      Val.setPointer(
-         const_cast<void *>(PointerLikeTypeTraits<PT1>::getAsVoidPointer(V)));
-      Val.setInt(0);
+    PointerUnion(PT1 V) : Val(
+      const_cast<void *>(PointerLikeTypeTraits<PT1>::getAsVoidPointer(V))) {
     }
-    PointerUnion(PT2 V) {
-      Val.setPointer(
-         const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(V)));
-      Val.setInt(1);
+    PointerUnion(PT2 V) : Val(
+      const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(V)), 1) {
     }
     
     /// isNull - Return true if the pointer held in the union is null,
@@ -160,15 +156,14 @@ namespace llvm {
     /// Assignment operators - Allow assigning into this union from either
     /// pointer type, setting the discriminator to remember what it came from.
     const PointerUnion &operator=(const PT1 &RHS) {
-      Val.setPointer(
+      Val.initWithPointer(
          const_cast<void *>(PointerLikeTypeTraits<PT1>::getAsVoidPointer(RHS)));
-      Val.setInt(0);
       return *this;
     }
     const PointerUnion &operator=(const PT2 &RHS) {
-      Val.setPointer(
-        const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(RHS)));
-      Val.setInt(1);
+      Val.setPointerAndInt(
+        const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(RHS)),
+        1);
       return *this;
     }
     
diff --git a/include/llvm/ADT/SCCIterator.h b/include/llvm/ADT/SCCIterator.h
index 48436c6674..8ce4fd53ba 100644
--- a/include/llvm/ADT/SCCIterator.h
+++ b/include/llvm/ADT/SCCIterator.h
@@ -21,8 +21,8 @@
 #ifndef LLVM_ADT_SCCITERATOR_H
 #define LLVM_ADT_SCCITERATOR_H
 
-#include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/GraphTraits.h"
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h
index a9cd54e13b..62620fa267 100644
--- a/include/llvm/ADT/SmallBitVector.h
+++ b/include/llvm/ADT/SmallBitVector.h
@@ -153,7 +153,7 @@ public:
       switchToLarge(new BitVector(*RHS.getPointer()));
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   SmallBitVector(SmallBitVector &&RHS) : X(RHS.X) {
     RHS.X = 1;
   }
@@ -472,7 +472,7 @@ public:
     return *this;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   const SmallBitVector &operator=(SmallBitVector &&RHS) {
     if (this != &RHS) {
       clear();
diff --git a/include/llvm/ADT/SmallSet.h b/include/llvm/ADT/SmallSet.h
index cd117f59ba..4eb7de895a 100644
--- a/include/llvm/ADT/SmallSet.h
+++ b/include/llvm/ADT/SmallSet.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_ADT_SMALLSET_H
 #define LLVM_ADT_SMALLSET_H
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include <set>
 
 namespace llvm {
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index e508f9df90..951e5741ff 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -178,7 +178,7 @@ protected:
   /// std::move, but not all stdlibs actually provide that.
   template<typename It1, typename It2>
   static It2 move(It1 I, It1 E, It2 Dest) {
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
     for (; I != E; ++I, ++Dest)
       *Dest = ::std::move(*I);
     return Dest;
@@ -193,7 +193,7 @@ protected:
   /// std::move_backward, but not all stdlibs actually provide that.
   template<typename It1, typename It2>
   static It2 move_backward(It1 I, It1 E, It2 Dest) {
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
     while (I != E)
       *--Dest = ::std::move(*--E);
     return Dest;
@@ -206,7 +206,7 @@ protected:
   /// memory starting with "Dest", constructing elements as needed.
   template<typename It1, typename It2>
   static void uninitialized_move(It1 I, It1 E, It2 Dest) {
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
     for (; I != E; ++I, ++Dest)
       ::new ((void*) &*Dest) T(::std::move(*I));
 #else
@@ -239,7 +239,7 @@ public:
     goto Retry;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   void push_back(T &&Elt) {
     if (this->EndX < this->CapacityX) {
     Retry:
@@ -422,7 +422,7 @@ public:
   }
 
   T pop_back_val() {
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
     T Result = ::std::move(this->back());
 #else
     T Result = this->back();
@@ -495,7 +495,7 @@ public:
     return(N);
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   iterator insert(iterator I, T &&Elt) {
     if (I == this->end()) {  // Important special case for empty vector.
       this->push_back(::std::move(Elt));
@@ -667,7 +667,7 @@ public:
 
   SmallVectorImpl &operator=(const SmallVectorImpl &RHS);
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   SmallVectorImpl &operator=(SmallVectorImpl &&RHS);
 #endif
 
@@ -787,7 +787,7 @@ SmallVectorImpl<T> &SmallVectorImpl<T>::
   return *this;
 }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
 template <typename T>
 SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) {
   // Avoid self-assignment.
@@ -898,7 +898,7 @@ public:
     return *this;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   SmallVector(SmallVector &&RHS) : SmallVectorImpl<T>(N) {
     if (!RHS.empty())
       SmallVectorImpl<T>::operator=(::std::move(RHS));
diff --git a/include/llvm/ADT/SparseSet.h b/include/llvm/ADT/SparseSet.h
index 063c6755c6..267a340a75 100644
--- a/include/llvm/ADT/SparseSet.h
+++ b/include/llvm/ADT/SparseSet.h
@@ -20,8 +20,8 @@
 #ifndef LLVM_ADT_SPARSESET_H
 #define LLVM_ADT_SPARSESET_H
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/DataTypes.h"
 #include <limits>
 
diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h
index bf27c4313f..9503e0f805 100644
--- a/include/llvm/ADT/StringExtras.h
+++ b/include/llvm/ADT/StringExtras.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_ADT_STRINGEXTRAS_H
 #define LLVM_ADT_STRINGEXTRAS_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 template<typename T> class SmallVectorImpl;
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index 292bde0cd9..0e93f51ef0 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -11,7 +11,6 @@
 #define LLVM_ADT_STRINGREF_H
 
 #include "llvm/Support/type_traits.h"
-
 #include <algorithm>
 #include <cassert>
 #include <cstring>
diff --git a/include/llvm/ADT/TinyPtrVector.h b/include/llvm/ADT/TinyPtrVector.h
index d3d33b8add..cc0e7b6381 100644
--- a/include/llvm/ADT/TinyPtrVector.h
+++ b/include/llvm/ADT/TinyPtrVector.h
@@ -70,7 +70,7 @@ public:
     return *this;
   }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   TinyPtrVector(TinyPtrVector &&RHS) : Val(RHS.Val) {
     RHS.Val = (EltTy)0;
   }
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index f317c190b0..49d9f68404 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -100,8 +100,8 @@ public:
     Haiku,
     Minix,
     RTEMS,
-    NativeClient,
-    CNK,         // BG/P Compute-Node Kernel
+    NaCl,       // Native Client
+    CNK,        // BG/P Compute-Node Kernel
     Bitrig,
     AIX
   };
@@ -310,9 +310,9 @@ public:
     return getOS() == Triple::Win32 || isOSCygMing();
   }
 
-  /// \brief isOSNaCl - Is this the Native Client OS.
+  /// \brief Tests whether the OS is NaCl (Native Client)
   bool isOSNaCl() const {
-    return getOS() == Triple::NativeClient;
+    return getOS() == Triple::NaCl;
   }
 
   /// \brief Tests whether the OS uses the ELF binary format.
diff --git a/include/llvm/ADT/ValueMap.h b/include/llvm/ADT/ValueMap.h
index d23fccf3e8..b4fed7a0eb 100644
--- a/include/llvm/ADT/ValueMap.h
+++ b/include/llvm/ADT/ValueMap.h
@@ -27,10 +27,9 @@
 #define LLVM_ADT_VALUEMAP_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Mutex.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/type_traits.h"
-#include "llvm/Support/Mutex.h"
-
 #include <iterator>
 
 namespace llvm {
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index be274afd15..3ce4732eca 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -37,8 +37,8 @@
 #ifndef LLVM_ANALYSIS_ALIAS_ANALYSIS_H
 #define LLVM_ANALYSIS_ALIAS_ANALYSIS_H
 
-#include "llvm/Support/CallSite.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/CallSite.h"
 
 namespace llvm {
 
@@ -373,7 +373,7 @@ public:
     return getModRefInfo(I, Location(P, Size));
   }
 
-  /// getModRefInfo (for call sites) - Return whether information about whether
+  /// getModRefInfo (for call sites) - Return information about whether
   /// a particular call site modifies or reads the specified memory location.
   virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
                                      const Location &Loc);
@@ -384,7 +384,7 @@ public:
     return getModRefInfo(CS, Location(P, Size));
   }
 
-  /// getModRefInfo (for calls) - Return whether information about whether
+  /// getModRefInfo (for calls) - Return information about whether
   /// a particular call modifies or reads the specified memory location.
   ModRefResult getModRefInfo(const CallInst *C, const Location &Loc) {
     return getModRefInfo(ImmutableCallSite(C), Loc);
@@ -395,7 +395,7 @@ public:
     return getModRefInfo(C, Location(P, Size));
   }
 
-  /// getModRefInfo (for invokes) - Return whether information about whether
+  /// getModRefInfo (for invokes) - Return information about whether
   /// a particular invoke modifies or reads the specified memory location.
   ModRefResult getModRefInfo(const InvokeInst *I,
                              const Location &Loc) {
@@ -408,7 +408,7 @@ public:
     return getModRefInfo(I, Location(P, Size));
   }
 
-  /// getModRefInfo (for loads) - Return whether information about whether
+  /// getModRefInfo (for loads) - Return information about whether
   /// a particular load modifies or reads the specified memory location.
   ModRefResult getModRefInfo(const LoadInst *L, const Location &Loc);
 
@@ -417,7 +417,7 @@ public:
     return getModRefInfo(L, Location(P, Size));
   }
 
-  /// getModRefInfo (for stores) - Return whether information about whether
+  /// getModRefInfo (for stores) - Return information about whether
   /// a particular store modifies or reads the specified memory location.
   ModRefResult getModRefInfo(const StoreInst *S, const Location &Loc);
 
@@ -426,7 +426,7 @@ public:
     return getModRefInfo(S, Location(P, Size));
   }
 
-  /// getModRefInfo (for fences) - Return whether information about whether
+  /// getModRefInfo (for fences) - Return information about whether
   /// a particular store modifies or reads the specified memory location.
   ModRefResult getModRefInfo(const FenceInst *S, const Location &Loc) {
     // Conservatively correct.  (We could possibly be a bit smarter if
@@ -439,7 +439,7 @@ public:
     return getModRefInfo(S, Location(P, Size));
   }
 
-  /// getModRefInfo (for cmpxchges) - Return whether information about whether
+  /// getModRefInfo (for cmpxchges) - Return information about whether
   /// a particular cmpxchg modifies or reads the specified memory location.
   ModRefResult getModRefInfo(const AtomicCmpXchgInst *CX, const Location &Loc);
 
@@ -449,7 +449,7 @@ public:
     return getModRefInfo(CX, Location(P, Size));
   }
 
-  /// getModRefInfo (for atomicrmws) - Return whether information about whether
+  /// getModRefInfo (for atomicrmws) - Return information about whether
   /// a particular atomicrmw modifies or reads the specified memory location.
   ModRefResult getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc);
 
@@ -459,7 +459,7 @@ public:
     return getModRefInfo(RMW, Location(P, Size));
   }
 
-  /// getModRefInfo (for va_args) - Return whether information about whether
+  /// getModRefInfo (for va_args) - Return information about whether
   /// a particular va_arg modifies or reads the specified memory location.
   ModRefResult getModRefInfo(const VAArgInst* I, const Location &Loc);
 
@@ -587,9 +587,9 @@ bool isNoAliasCall(const Value *V);
 /// isIdentifiedObject - Return true if this pointer refers to a distinct and
 /// identifiable object.  This returns true for:
 ///    Global Variables and Functions (but not Global Aliases)
-///    Allocas and Mallocs
+///    Allocas
 ///    ByVal and NoAlias Arguments
-///    NoAlias returns
+///    NoAlias returns (e.g. calls to malloc)
 ///
 bool isIdentifiedObject(const Value *V);
 
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index 1e606c81d9..163331c262 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -17,11 +17,11 @@
 #ifndef LLVM_ANALYSIS_ALIASSETTRACKER_H
 #define LLVM_ANALYSIS_ALIASSETTRACKER_H
 
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ValueHandle.h"
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/Analysis/BlockFrequencyImpl.h b/include/llvm/Analysis/BlockFrequencyImpl.h
index 5168ab7872..c7b36846dd 100644
--- a/include/llvm/Analysis/BlockFrequencyImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyImpl.h
@@ -14,17 +14,17 @@
 #ifndef LLVM_ANALYSIS_BLOCKFREQUENCYIMPL_H
 #define LLVM_ANALYSIS_BLOCKFREQUENCYIMPL_H
 
-#include "llvm/BasicBlock.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/BasicBlock.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include <vector>
 #include <string>
+#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h
index c0567daa3a..6c23f7c3ae 100644
--- a/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -14,10 +14,10 @@
 #ifndef LLVM_ANALYSIS_BRANCHPROBABILITYINFO_H
 #define LLVM_ANALYSIS_BRANCHPROBABILITYINFO_H
 
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/BranchProbability.h"
 
 namespace llvm {
diff --git a/include/llvm/Analysis/CFGPrinter.h b/include/llvm/Analysis/CFGPrinter.h
index 4704a929ac..2af7a0253e 100644
--- a/include/llvm/Analysis/CFGPrinter.h
+++ b/include/llvm/Analysis/CFGPrinter.h
@@ -15,10 +15,10 @@
 #ifndef LLVM_ANALYSIS_CFGPRINTER_H
 #define LLVM_ANALYSIS_CFGPRINTER_H
 
+#include "llvm/Assembly/Writer.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/GraphWriter.h"
 
diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h
index 6a9ed31037..d8b80df8fc 100644
--- a/include/llvm/Analysis/CallGraph.h
+++ b/include/llvm/Analysis/CallGraph.h
@@ -51,13 +51,13 @@
 #ifndef LLVM_ANALYSIS_CALLGRAPH_H
 #define LLVM_ANALYSIS_CALLGRAPH_H
 
-#include "llvm/Function.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/IncludeFile.h"
+#include "llvm/Support/ValueHandle.h"
 #include <map>
 
 namespace llvm {
diff --git a/include/llvm/Analysis/CaptureTracking.h b/include/llvm/Analysis/CaptureTracking.h
index 2889269b95..6ebe2d77be 100644
--- a/include/llvm/Analysis/CaptureTracking.h
+++ b/include/llvm/Analysis/CaptureTracking.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_ANALYSIS_CAPTURETRACKING_H
 #define LLVM_ANALYSIS_CAPTURETRACKING_H
 
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Support/CallSite.h"
 
 namespace llvm {
diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h
index b701b8fca5..bd8c4a100f 100644
--- a/include/llvm/Analysis/DOTGraphTraitsPass.h
+++ b/include/llvm/Analysis/DOTGraphTraitsPass.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_ANALYSIS_DOT_GRAPHTRAITS_PASS_H
 #define LLVM_ANALYSIS_DOT_GRAPHTRAITS_PASS_H
 
-#include "llvm/Pass.h"
 #include "llvm/Analysis/CFGPrinter.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 template <class Analysis, bool Simple>
diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h
index 1983c00c57..850413faf3 100644
--- a/include/llvm/Analysis/DependenceAnalysis.h
+++ b/include/llvm/Analysis/DependenceAnalysis.h
@@ -40,9 +40,9 @@
 #ifndef LLVM_ANALYSIS_DEPENDENCEANALYSIS_H
 #define LLVM_ANALYSIS_DEPENDENCEANALYSIS_H
 
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
-#include "llvm/ADT/SmallBitVector.h"
 
 namespace llvm {
   class AliasAnalysis;
diff --git a/include/llvm/Analysis/DominatorInternals.h b/include/llvm/Analysis/DominatorInternals.h
index 0c29236dde..c0f95cbd9b 100644
--- a/include/llvm/Analysis/DominatorInternals.h
+++ b/include/llvm/Analysis/DominatorInternals.h
@@ -10,8 +10,8 @@
 #ifndef LLVM_ANALYSIS_DOMINATOR_INTERNALS_H
 #define LLVM_ANALYSIS_DOMINATOR_INTERNALS_H
 
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/Dominators.h"
 
 //===----------------------------------------------------------------------===//
 //
diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h
index 8940971558..c69b1edec7 100644
--- a/include/llvm/Analysis/Dominators.h
+++ b/include/llvm/Analysis/Dominators.h
@@ -15,13 +15,13 @@
 #ifndef LLVM_ANALYSIS_DOMINATORS_H
 #define LLVM_ANALYSIS_DOMINATORS_H
 
-#include "llvm/Pass.h"
-#include "llvm/Function.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index 82a3a566c9..515cbce345 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -14,11 +14,11 @@
 #ifndef LLVM_ANALYSIS_INLINECOST_H
 #define LLVM_ANALYSIS_INLINECOST_H
 
-#include "llvm/Function.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/ValueMap.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Function.h"
 #include <cassert>
 #include <climits>
 #include <vector>
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index 6db400c563..e9b72a2fa7 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -25,6 +25,7 @@ namespace llvm {
   class DominatorTree;
   class Instruction;
   class DataLayout;
+  class FastMathFlags;
   class TargetLibraryInfo;
   class Type;
   class Value;
@@ -43,6 +44,14 @@ namespace llvm {
                          const TargetLibraryInfo *TLI = 0,
                          const DominatorTree *DT = 0);
 
+  /// Given operands for an FMul, see if we can fold the result.  If not, this
+  /// returns null.
+  Value *SimplifyFMulInst(Value *LHS, Value *RHS,
+                          FastMathFlags FMF,
+                          const DataLayout *TD = 0,
+                          const TargetLibraryInfo *TLI = 0,
+                          const DominatorTree *DT = 0);
+
   /// SimplifyMulInst - Given operands for a Mul, see if we can
   /// fold the result.  If not, this returns null.
   Value *SimplifyMulInst(Value *LHS, Value *RHS, const DataLayout *TD = 0,
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index c5d7b0128e..830754ded1 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -30,14 +30,14 @@
 #ifndef LLVM_ANALYSIS_LOOP_INFO_H
 #define LLVM_ANALYSIS_LOOP_INFO_H
 
-#include "llvm/Pass.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h
index 3bb96f96bf..4b8e4c9f82 100644
--- a/include/llvm/Analysis/LoopInfoImpl.h
+++ b/include/llvm/Analysis/LoopInfoImpl.h
@@ -15,8 +15,8 @@
 #ifndef LLVM_ANALYSIS_LOOP_INFO_IMPL_H
 #define LLVM_ANALYSIS_LOOP_INFO_IMPL_H
 
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Analysis/LoopInfo.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Analysis/LoopPass.h b/include/llvm/Analysis/LoopPass.h
index e6ed9bccee..4c16daffc0 100644
--- a/include/llvm/Analysis/LoopPass.h
+++ b/include/llvm/Analysis/LoopPass.h
@@ -16,9 +16,9 @@
 #define LLVM_LOOP_PASS_H
 
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Function.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManagers.h"
-#include "llvm/Function.h"
 #include <deque>
 
 namespace llvm {
@@ -39,6 +39,9 @@ public:
   // whatever action is necessary for the specified Loop.
   virtual bool runOnLoop(Loop *L, LPPassManager &LPM) = 0;
 
+  using llvm::Pass::doInitialization;
+  using llvm::Pass::doFinalization;
+
   // Initialization and finalization hooks.
   virtual bool doInitialization(Loop *L, LPPassManager &LPM) {
     return false;
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index a842898e41..a797374a59 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -15,12 +15,12 @@
 #ifndef LLVM_ANALYSIS_MEMORYBUILTINS_H
 #define LLVM_ANALYSIS_MEMORYBUILTINS_H
 
-#include "llvm/IRBuilder.h"
-#include "llvm/Operator.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Operator.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/TargetFolder.h"
 #include "llvm/Support/ValueHandle.h"
 
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index a715eaeee1..121eede1c0 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -14,14 +14,14 @@
 #ifndef LLVM_ANALYSIS_MEMORY_DEPENDENCE_H
 #define LLVM_ANALYSIS_MEMORY_DEPENDENCE_H
 
-#include "llvm/BasicBlock.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/ValueHandle.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ValueHandle.h"
 
 namespace llvm {
   class Function;
diff --git a/include/llvm/Analysis/PHITransAddr.h b/include/llvm/Analysis/PHITransAddr.h
index 5a77fcebaf..781ac07114 100644
--- a/include/llvm/Analysis/PHITransAddr.h
+++ b/include/llvm/Analysis/PHITransAddr.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_ANALYSIS_PHITRANSADDR_H
 #define LLVM_ANALYSIS_PHITRANSADDR_H
 
-#include "llvm/Instruction.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Instruction.h"
 
 namespace llvm {
   class DominatorTree;
diff --git a/include/llvm/Analysis/PathNumbering.h b/include/llvm/Analysis/PathNumbering.h
index 7025e28484..a9f76219f4 100644
--- a/include/llvm/Analysis/PathNumbering.h
+++ b/include/llvm/Analysis/PathNumbering.h
@@ -26,11 +26,11 @@
 #ifndef LLVM_PATH_NUMBERING_H
 #define LLVM_PATH_NUMBERING_H
 
+#include "llvm/Analysis/ProfileInfoTypes.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Analysis/ProfileInfoTypes.h"
 #include <map>
 #include <stack>
 #include <vector>
diff --git a/include/llvm/Analysis/PathProfileInfo.h b/include/llvm/Analysis/PathProfileInfo.h
index cef6d2d2a6..53880694f0 100644
--- a/include/llvm/Analysis/PathProfileInfo.h
+++ b/include/llvm/Analysis/PathProfileInfo.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_PATHPROFILEINFO_H
 #define LLVM_PATHPROFILEINFO_H
 
-#include "llvm/BasicBlock.h"
 #include "llvm/Analysis/PathNumbering.h"
+#include "llvm/BasicBlock.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Analysis/ProfileDataLoader.h b/include/llvm/Analysis/ProfileDataLoader.h
index 9efbafcef4..90097f7995 100644
--- a/include/llvm/Analysis/ProfileDataLoader.h
+++ b/include/llvm/Analysis/ProfileDataLoader.h
@@ -16,6 +16,7 @@
 #ifndef LLVM_ANALYSIS_PROFILEDATALOADER_H
 #define LLVM_ANALYSIS_PROFILEDATALOADER_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
diff --git a/include/llvm/Analysis/ProfileInfo.h b/include/llvm/Analysis/ProfileInfo.h
index 6c2e2732d3..5d17fa1220 100644
--- a/include/llvm/Analysis/ProfileInfo.h
+++ b/include/llvm/Analysis/ProfileInfo.h
@@ -26,9 +26,9 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
-#include <string>
 #include <map>
 #include <set>
+#include <string>
 
 namespace llvm {
   class Pass;
diff --git a/include/llvm/Analysis/ProfileInfoLoader.h b/include/llvm/Analysis/ProfileInfoLoader.h
index dcf3b38ddc..e0f49f3179 100644
--- a/include/llvm/Analysis/ProfileInfoLoader.h
+++ b/include/llvm/Analysis/ProfileInfoLoader.h
@@ -16,9 +16,9 @@
 #ifndef LLVM_ANALYSIS_PROFILEINFOLOADER_H
 #define LLVM_ANALYSIS_PROFILEINFOLOADER_H
 
-#include <vector>
 #include <string>
 #include <utility>
+#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/Analysis/PtrUseVisitor.h b/include/llvm/Analysis/PtrUseVisitor.h
new file mode 100644
index 0000000000..e15f2b45a8
--- /dev/null
+++ b/include/llvm/Analysis/PtrUseVisitor.h
@@ -0,0 +1,286 @@
+//===- PtrUseVisitor.h - InstVisitors over a pointers uses ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file provides a collection of visitors which walk the (instruction)
+/// uses of a pointer. These visitors all provide the same essential behavior
+/// as an InstVisitor with similar template-based flexibility and
+/// implementation strategies.
+///
+/// These can be used, for example, to quickly analyze the uses of an alloca,
+/// global variable, or function argument.
+///
+/// FIXME: Provide a variant which doesn't track offsets and is cheaper.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_PTRUSEVISITOR_H
+#define LLVM_ANALYSIS_PTRUSEVISITOR_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DataLayout.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+
+namespace llvm {
+
+namespace detail {
+/// \brief Implementation of non-dependent functionality for \c PtrUseVisitor.
+///
+/// See \c PtrUseVisitor for the public interface and detailed comments about
+/// usage. This class is just a helper base class which is not templated and
+/// contains all common code to be shared between different instantiations of
+/// PtrUseVisitor.
+class PtrUseVisitorBase {
+public:
+  /// \brief This class provides information about the result of a visit.
+  ///
+  /// After walking all the users (recursively) of a pointer, the basic
+  /// infrastructure records some commonly useful information such as escape
+  /// analysis and whether the visit completed or aborted early.
+  class PtrInfo {
+  public:
+    PtrInfo() : AbortedInfo(0, false), EscapedInfo(0, false) {}
+
+    /// \brief Reset the pointer info, clearing all state.
+    void reset() {
+      AbortedInfo.setPointer(0);
+      AbortedInfo.setInt(false);
+      EscapedInfo.setPointer(0);
+      EscapedInfo.setInt(false);
+    }
+
+    /// \brief Did we abort the visit early?
+    bool isAborted() const { return AbortedInfo.getInt(); }
+
+    /// \brief Is the pointer escaped at some point?
+    bool isEscaped() const { return EscapedInfo.getInt(); }
+
+    /// \brief Get the instruction causing the visit to abort.
+    /// \returns a pointer to the instruction causing the abort if one is
+    /// available; otherwise returns null.
+    Instruction *getAbortingInst() const { return AbortedInfo.getPointer(); }
+
+    /// \brief Get the instruction causing the pointer to escape.
+    /// \returns a pointer to the instruction which escapes the pointer if one
+    /// is available; otherwise returns null.
+    Instruction *getEscapingInst() const { return EscapedInfo.getPointer(); }
+
+    /// \brief Mark the visit as aborted. Intended for use in a void return.
+    /// \param I The instruction which caused the visit to abort, if available.
+    void setAborted(Instruction *I = 0) {
+      AbortedInfo.setInt(true);
+      AbortedInfo.setPointer(I);
+    }
+
+    /// \brief Mark the pointer as escaped. Intended for use in a void return.
+    /// \param I The instruction which escapes the pointer, if available.
+    void setEscaped(Instruction *I = 0) {
+      EscapedInfo.setInt(true);
+      EscapedInfo.setPointer(I);
+    }
+
+    /// \brief Mark the pointer as escaped, and the visit as aborted. Intended
+    /// for use in a void return.
+    /// \param I The instruction which both escapes the pointer and aborts the
+    /// visit, if available.
+    void setEscapedAndAborted(Instruction *I = 0) {
+      setEscaped(I);
+      setAborted(I);
+    }
+
+  private:
+    PointerIntPair<Instruction *, 1, bool> AbortedInfo, EscapedInfo;
+  };
+
+protected:
+  const DataLayout &DL;
+
+  /// \name Visitation infrastructure
+  /// @{
+
+  /// \brief The info collected about the pointer being visited thus far.
+  PtrInfo PI;
+
+  /// \brief A struct of the data needed to visit a particular use.
+  ///
+  /// This is used to maintain a worklist fo to-visit uses. This is used to
+  /// make the visit be iterative rather than recursive.
+  struct UseToVisit {
+    typedef PointerIntPair<Use *, 1, bool> UseAndIsOffsetKnownPair;
+    UseAndIsOffsetKnownPair UseAndIsOffsetKnown;
+    APInt Offset;
+  };
+
+  /// \brief The worklist of to-visit uses.
+  SmallVector<UseToVisit, 8> Worklist;
+
+  /// \brief A set of visited uses to break cycles in unreachable code.
+  SmallPtrSet<Use *, 8> VisitedUses;
+
+  /// @}
+
+
+  /// \name Per-visit state
+  /// This state is reset for each instruction visited.
+  /// @{
+
+  /// \brief The use currently being visited.
+  Use *U;
+
+  /// \brief True if we have a known constant offset for the use currently
+  /// being visited.
+  bool IsOffsetKnown;
+
+  /// \brief The constant offset of the use if that is known.
+  APInt Offset;
+
+  /// @}
+
+
+  /// Note that the constructor is protected because this class must be a base
+  /// class, we can't create instances directly of this class.
+  PtrUseVisitorBase(const DataLayout &DL) : DL(DL) {}
+
+  /// \brief Enqueue the users of this instruction in the visit worklist.
+  ///
+  /// This will visit the users with the same offset of the current visit
+  /// (including an unknown offset if that is the current state).
+  void enqueueUsers(Instruction &I);
+
+  /// \brief Walk the operands of a GEP and adjust the offset as appropriate.
+  ///
+  /// This routine does the heavy lifting of the pointer walk by computing
+  /// offsets and looking through GEPs.
+  bool adjustOffsetForGEP(GetElementPtrInst &GEPI);
+};
+} // end namespace detail
+
+/// \brief A base class for visitors over the uses of a pointer value.
+///
+/// Once constructed, a user can call \c visit on a pointer value, and this
+/// will walk its uses and visit each instruction using an InstVisitor. It also
+/// provides visit methods which will recurse through any pointer-to-pointer
+/// transformations such as GEPs and bitcasts.
+///
+/// During the visit, the current Use* being visited is available to the
+/// subclass, as well as the current offset from the original base pointer if
+/// known.
+///
+/// The recursive visit of uses is accomplished with a worklist, so the only
+/// ordering guarantee is that an instruction is visited before any uses of it
+/// are visited. Note that this does *not* mean before any of its users are
+/// visited! This is because users can be visited multiple times due to
+/// multiple, different uses of pointers derived from the same base.
+///
+/// A particular Use will only be visited once, but a User may be visited
+/// multiple times, once per Use. This visits may notably have different
+/// offsets.
+///
+/// All visit methods on the underlying InstVisitor return a boolean. This
+/// return short-circuits the visit, stopping it immediately.
+///
+/// FIXME: Generalize this for all values rather than just instructions.
+template <typename DerivedT>
+class PtrUseVisitor : protected InstVisitor<DerivedT>,
+                      public detail::PtrUseVisitorBase {
+  friend class InstVisitor<DerivedT>;
+  typedef InstVisitor<DerivedT> Base;
+
+public:
+  PtrUseVisitor(const DataLayout &DL) : PtrUseVisitorBase(DL) {}
+
+  /// \brief Recursively visit the uses of the given pointer.
+  /// \returns An info struct about the pointer. See \c PtrInfo for details.
+  PtrInfo visitPtr(Instruction &I) {
+    // This must be a pointer type. Get an integer type suitable to hold
+    // offsets on this pointer.
+    // FIXME: Support a vector of pointers.
+    assert(I.getType()->isPointerTy());
+    IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(I.getType()));
+    IsOffsetKnown = true;
+    Offset = APInt(IntPtrTy->getBitWidth(), 0);
+    PI.reset();
+
+    // Enqueue the uses of this pointer.
+    enqueueUsers(I);
+
+    // Visit all the uses off the worklist until it is empty.
+    while (!Worklist.empty()) {
+      UseToVisit ToVisit = Worklist.pop_back_val();
+      U = ToVisit.UseAndIsOffsetKnown.getPointer();
+      IsOffsetKnown = ToVisit.UseAndIsOffsetKnown.getInt();
+      if (IsOffsetKnown)
+        Offset = llvm_move(ToVisit.Offset);
+
+      Instruction *I = cast<Instruction>(U->getUser());
+      static_cast<DerivedT*>(this)->visit(I);
+      if (PI.isAborted())
+        break;
+    }
+    return PI;
+  }
+
+protected:
+  void visitStoreInst(StoreInst &SI) {
+    if (SI.getValueOperand() == U->get())
+      PI.setEscaped(&SI);
+  }
+
+  void visitBitCastInst(BitCastInst &BC) {
+    enqueueUsers(BC);
+  }
+
+  void visitPtrToIntInst(PtrToIntInst &I) {
+    PI.setEscaped(&I);
+  }
+
+  void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+    if (GEPI.use_empty())
+      return;
+
+    // If we can't walk the GEP, clear the offset.
+    if (!adjustOffsetForGEP(GEPI)) {
+      IsOffsetKnown = false;
+      Offset = APInt();
+    }
+
+    // Enqueue the users now that the offset has been adjusted.
+    enqueueUsers(GEPI);
+  }
+
+  // No-op intrinsics which we know don't escape the pointer to to logic in
+  // some other function.
+  void visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) {}
+  void visitMemIntrinsic(MemIntrinsic &I) {}
+  void visitIntrinsicInst(IntrinsicInst &II) {
+    switch (II.getIntrinsicID()) {
+    default:
+      return Base::visitIntrinsicInst(II);
+
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+      return; // No-op intrinsics.
+    }
+  }
+
+  // Generically, arguments to calls and invokes escape the pointer to some
+  // other function. Mark that.
+  void visitCallSite(CallSite CS) {
+    PI.setEscaped(CS.getInstruction());
+    Base::visitCallSite(CS);
+  }
+};
+
+}
+
+#endif
diff --git a/include/llvm/Analysis/RegionIterator.h b/include/llvm/Analysis/RegionIterator.h
index 7adc71ca82..bcff2276d0 100644
--- a/include/llvm/Analysis/RegionIterator.h
+++ b/include/llvm/Analysis/RegionIterator.h
@@ -12,8 +12,8 @@
 #define LLVM_ANALYSIS_REGION_ITERATOR_H
 
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/include/llvm/Analysis/RegionPass.h b/include/llvm/Analysis/RegionPass.h
index 68f12012bc..7d450887f0 100644
--- a/include/llvm/Analysis/RegionPass.h
+++ b/include/llvm/Analysis/RegionPass.h
@@ -17,11 +17,9 @@
 #define LLVM_REGION_PASS_H
 
 #include "llvm/Analysis/RegionInfo.h"
-
+#include "llvm/Function.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManagers.h"
-#include "llvm/Function.h"
-
 #include <deque>
 
 namespace llvm {
@@ -59,6 +57,9 @@ public:
   /// @return The pass to print the LLVM IR in the region.
   Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
 
+  using llvm::Pass::doInitialization;
+  using llvm::Pass::doFinalization;
+
   virtual bool doInitialization(Region *R, RGPassManager &RGM) { return false; }
   virtual bool doFinalization() { return false; }
   //@}
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 235adca021..dcefaa04e0 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -21,16 +21,16 @@
 #ifndef LLVM_ANALYSIS_SCALAREVOLUTION_H
 #define LLVM_ANALYSIS_SCALAREVOLUTION_H
 
-#include "llvm/Pass.h"
-#include "llvm/Instructions.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
 #include "llvm/Function.h"
+#include "llvm/Instructions.h"
 #include "llvm/Operator.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/ValueHandle.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ConstantRange.h"
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ValueHandle.h"
 #include <map>
 
 namespace llvm {
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 3f8f149cb4..95772a887f 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H
 #define LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H
 
-#include "llvm/IRBuilder.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ScalarEvolutionNormalization.h"
+#include "llvm/IRBuilder.h"
 #include "llvm/Support/TargetFolder.h"
 #include "llvm/Support/ValueHandle.h"
 #include <set>
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index 54db7d6bcf..b74cb33285 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_ANALYSIS_SCALAREVOLUTION_EXPRESSIONS_H
 #define LLVM_ANALYSIS_SCALAREVOLUTION_EXPRESSIONS_H
 
-#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
diff --git a/include/llvm/Analysis/SparsePropagation.h b/include/llvm/Analysis/SparsePropagation.h
index b758eca42e..604e30666e 100644
--- a/include/llvm/Analysis/SparsePropagation.h
+++ b/include/llvm/Analysis/SparsePropagation.h
@@ -17,8 +17,8 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include <vector>
 #include <set>
+#include <vector>
 
 namespace llvm {
   class Value;
diff --git a/include/llvm/Analysis/Trace.h b/include/llvm/Analysis/Trace.h
index 99651e192d..26947fe34f 100644
--- a/include/llvm/Analysis/Trace.h
+++ b/include/llvm/Analysis/Trace.h
@@ -18,8 +18,8 @@
 #ifndef LLVM_ANALYSIS_TRACE_H
 #define LLVM_ANALYSIS_TRACE_H
 
-#include <vector>
 #include <cassert>
+#include <vector>
 
 namespace llvm {
   class BasicBlock;
diff --git a/include/llvm/Argument.h b/include/llvm/Argument.h
index b1c2218519..dc9df34c60 100644
--- a/include/llvm/Argument.h
+++ b/include/llvm/Argument.h
@@ -14,10 +14,10 @@
 #ifndef LLVM_ARGUMENT_H
 #define LLVM_ARGUMENT_H
 
-#include "llvm/Value.h"
-#include "llvm/Attributes.h"
-#include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/Attributes.h"
+#include "llvm/Value.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h
index 6c352de2a9..ed18ca4c40 100644
--- a/include/llvm/Attributes.h
+++ b/include/llvm/Attributes.h
@@ -15,8 +15,8 @@
 #ifndef LLVM_ATTRIBUTES_H
 #define LLVM_ATTRIBUTES_H
 
-#include "llvm/Support/MathExtras.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/MathExtras.h"
 #include <cassert>
 #include <string>
 
@@ -298,14 +298,14 @@ struct AttributeWithIndex {
 };
 
 //===----------------------------------------------------------------------===//
-// AttrListPtr Smart Pointer
+// AttributeSet Smart Pointer
 //===----------------------------------------------------------------------===//
 
 class AttributeListImpl;
 
-/// AttrListPtr - This class manages the ref count for the opaque
+/// AttributeSet - This class manages the ref count for the opaque
 /// AttributeListImpl object and provides accessors for it.
-class AttrListPtr {
+class AttributeSet {
 public:
   enum AttrIndex {
     ReturnIndex = 0U,
@@ -320,28 +320,28 @@ private:
   /// for the result are denoted with Idx = 0.
   Attributes getAttributes(unsigned Idx) const;
 
-  explicit AttrListPtr(AttributeListImpl *LI) : AttrList(LI) {}
+  explicit AttributeSet(AttributeListImpl *LI) : AttrList(LI) {}
 public:
-  AttrListPtr() : AttrList(0) {}
-  AttrListPtr(const AttrListPtr &P) : AttrList(P.AttrList) {}
-  const AttrListPtr &operator=(const AttrListPtr &RHS);
+  AttributeSet() : AttrList(0) {}
+  AttributeSet(const AttributeSet &P) : AttrList(P.AttrList) {}
+  const AttributeSet &operator=(const AttributeSet &RHS);
 
   //===--------------------------------------------------------------------===//
   // Attribute List Construction and Mutation
   //===--------------------------------------------------------------------===//
 
   /// get - Return a Attributes list with the specified parameters in it.
-  static AttrListPtr get(LLVMContext &C, ArrayRef<AttributeWithIndex> Attrs);
+  static AttributeSet get(LLVMContext &C, ArrayRef<AttributeWithIndex> Attrs);
 
   /// addAttr - Add the specified attribute at the specified index to this
   /// attribute list.  Since attribute lists are immutable, this
   /// returns the new list.
-  AttrListPtr addAttr(LLVMContext &C, unsigned Idx, Attributes Attrs) const;
+  AttributeSet addAttr(LLVMContext &C, unsigned Idx, Attributes Attrs) const;
 
   /// removeAttr - Remove the specified attribute at the specified index from
   /// this attribute list.  Since attribute lists are immutable, this
   /// returns the new list.
-  AttrListPtr removeAttr(LLVMContext &C, unsigned Idx, Attributes Attrs) const;
+  AttributeSet removeAttr(LLVMContext &C, unsigned Idx, Attributes Attrs) const;
 
   //===--------------------------------------------------------------------===//
   // Attribute List Accessors
@@ -383,9 +383,9 @@ public:
   Attributes &getAttributesAtIndex(unsigned i) const;
 
   /// operator==/!= - Provide equality predicates.
-  bool operator==(const AttrListPtr &RHS) const
+  bool operator==(const AttributeSet &RHS) const
   { return AttrList == RHS.AttrList; }
-  bool operator!=(const AttrListPtr &RHS) const
+  bool operator!=(const AttributeSet &RHS) const
   { return AttrList != RHS.AttrList; }
 
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/BasicBlock.h b/include/llvm/BasicBlock.h
index 02c2a96b6c..ba8406e397 100644
--- a/include/llvm/BasicBlock.h
+++ b/include/llvm/BasicBlock.h
@@ -14,11 +14,11 @@
 #ifndef LLVM_BASICBLOCK_H
 #define LLVM_BASICBLOCK_H
 
-#include "llvm/Instruction.h"
-#include "llvm/SymbolTableListTraits.h"
-#include "llvm/ADT/ilist.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/Instruction.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/SymbolTableListTraits.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
index 2e8c9f46b8..1021fbd0dc 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -15,8 +15,8 @@
 #ifndef BITSTREAM_WRITER_H
 #define BITSTREAM_WRITER_H
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Bitcode/BitCodes.h"
 #include <vector>
 
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index 511e3a377a..601777d11a 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -54,6 +54,8 @@ namespace bitc {
     MODULE_CODE_DATALAYOUT  = 3,    // DATALAYOUT:  [strchr x N]
     MODULE_CODE_ASM         = 4,    // ASM:         [strchr x N]
     MODULE_CODE_SECTIONNAME = 5,    // SECTIONNAME: [strchr x N]
+
+    // FIXME: Remove DEPLIB in 4.0.
     MODULE_CODE_DEPLIB      = 6,    // DEPLIB:      [strchr x N]
 
     // GLOBALVAR: [pointer type, isconst, initid,
diff --git a/include/llvm/CallGraphSCCPass.h b/include/llvm/CallGraphSCCPass.h
index 7154aa3259..4446777669 100644
--- a/include/llvm/CallGraphSCCPass.h
+++ b/include/llvm/CallGraphSCCPass.h
@@ -21,8 +21,8 @@
 #ifndef LLVM_CALL_GRAPH_SCC_PASS_H
 #define LLVM_CALL_GRAPH_SCC_PASS_H
 
-#include "llvm/Pass.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
@@ -39,6 +39,9 @@ public:
   /// corresponding to a CallGraph.
   Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
 
+  using llvm::Pass::doInitialization;
+  using llvm::Pass::doFinalization;
+
   /// doInitialization - This method is called before the SCC's of the program
   /// has been processed, allowing the pass to do initialization as necessary.
   virtual bool doInitialization(CallGraph &CG) {
diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h
index 0b609ed658..4ff0be758a 100644
--- a/include/llvm/CodeGen/Analysis.h
+++ b/include/llvm/CodeGen/Analysis.h
@@ -14,12 +14,12 @@
 #ifndef LLVM_CODEGEN_ANALYSIS_H
 #define LLVM_CODEGEN_ANALYSIS_H
 
-#include "llvm/Instructions.h"
-#include "llvm/InlineAsm.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
 #include "llvm/Support/CallSite.h"
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h
index 2f76a6cc55..9cd2decfac 100644
--- a/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/include/llvm/CodeGen/CalcSpillWeights.h
@@ -11,8 +11,8 @@
 #ifndef LLVM_CODEGEN_CALCSPILLWEIGHTS_H
 #define LLVM_CODEGEN_CALCSPILLWEIGHTS_H
 
-#include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SlotIndexes.h"
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index c1bc3aba5e..064a92ef36 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -16,11 +16,11 @@
 #define LLVM_CODEGEN_CALLINGCONVLOWER_H
 
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CallingConv.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Target/TargetCallingConv.h"
-#include "llvm/CallingConv.h"
 
 namespace llvm {
   class TargetRegisterInfo;
diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h
index 90ee234244..e7a654b423 100644
--- a/include/llvm/CodeGen/CommandFlags.h
+++ b/include/llvm/CodeGen/CommandFlags.h
@@ -16,10 +16,9 @@
 #ifndef LLVM_CODEGEN_COMMAND_LINE_FLAGS_H
 #define LLVM_CODEGEN_COMMAND_LINE_FLAGS_H
 
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetMachine.h"
-
 #include <string>
 using namespace llvm;
 
diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h
index e4386fc8e2..9d25fd377b 100644
--- a/include/llvm/CodeGen/DFAPacketizer.h
+++ b/include/llvm/CodeGen/DFAPacketizer.h
@@ -26,8 +26,8 @@
 #ifndef LLVM_CODEGEN_DFAPACKETIZER_H
 #define LLVM_CODEGEN_DFAPACKETIZER_H
 
-#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include <map>
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 7c24e36092..05619050a3 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -15,8 +15,8 @@
 #define LLVM_CODEGEN_FASTISEL_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/ValueTypes.h"
 
 namespace llvm {
 
@@ -131,6 +131,10 @@ public:
   /// into the current block.
   void recomputeInsertPt();
 
+  /// removeDeadCode - Remove all dead instructions between the I and E.
+  void removeDeadCode(MachineBasicBlock::iterator I,
+                      MachineBasicBlock::iterator E);
+
   struct SavePoint {
     MachineBasicBlock::iterator InsertPt;
     DebugLoc DL;
@@ -395,10 +399,6 @@ private:
 
   /// hasTrivialKill - Test whether the given value has exactly one use.
   bool hasTrivialKill(const Value *V) const;
-
-  /// removeDeadCode - Remove all dead instructions between the I and E.
-  void removeDeadCode(MachineBasicBlock::iterator I,
-                      MachineBasicBlock::iterator E);
 };
 
 }
diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h
index 8cf22eca4f..789f77f26e 100644
--- a/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -15,8 +15,6 @@
 #ifndef LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
 #define LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
 
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
@@ -24,9 +22,11 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include <vector>
diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h
index 076f6f39fe..fa40049dbd 100644
--- a/include/llvm/CodeGen/GCMetadata.h
+++ b/include/llvm/CodeGen/GCMetadata.h
@@ -33,9 +33,9 @@
 #ifndef LLVM_CODEGEN_GCMETADATA_H
 #define LLVM_CODEGEN_GCMETADATA_H
 
-#include "llvm/Pass.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/DebugLoc.h"
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h
index f95b8b6b84..e14d04e422 100644
--- a/include/llvm/CodeGen/JITCodeEmitter.h
+++ b/include/llvm/CodeGen/JITCodeEmitter.h
@@ -17,11 +17,11 @@
 #ifndef LLVM_CODEGEN_JITCODEEMITTER_H
 #define LLVM_CODEGEN_JITCODEEMITTER_H
 
-#include <string>
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/CodeGen/MachineCodeEmitter.h"
-#include "llvm/ADT/DenseMap.h"
+#include <string>
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/LexicalScopes.h b/include/llvm/CodeGen/LexicalScopes.h
index e1911cfd82..bb22cb74c5 100644
--- a/include/llvm/CodeGen/LexicalScopes.h
+++ b/include/llvm/CodeGen/LexicalScopes.h
@@ -17,11 +17,11 @@
 #ifndef LLVM_CODEGEN_LEXICALSCOPES_H
 #define LLVM_CODEGEN_LEXICALSCOPES_H
 
-#include "llvm/Metadata.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Metadata.h"
 #include "llvm/Support/DebugLoc.h"
 #include "llvm/Support/ValueHandle.h"
 #include <utility>
diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h
index 46dd004609..2145eb80bd 100644
--- a/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -15,9 +15,9 @@
 #ifndef LLVM_CODEGEN_LINKALLCODEGENCOMPONENTS_H
 #define LLVM_CODEGEN_LINKALLCODEGENCOMPONENTS_H
 
+#include "llvm/CodeGen/GCs.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/CodeGen/GCs.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cstdlib>
 
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index 185e414ae2..95760758de 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -22,9 +22,9 @@
 #define LLVM_CODEGEN_LIVEINTERVAL_H
 
 #include "llvm/ADT/IntEqClasses.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/AlignOf.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
 #include <cassert>
 #include <climits>
 
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index b421753dd5..9e89519b28 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -20,16 +20,16 @@
 #ifndef LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H
 #define LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H
 
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <cmath>
 #include <iterator>
 
@@ -347,6 +347,10 @@ namespace llvm {
       return RegUnitIntervals[Unit];
     }
 
+    const LiveInterval *getCachedRegUnit(unsigned Unit) const {
+      return RegUnitIntervals[Unit];
+    }
+
   private:
     /// computeIntervals - Compute live intervals.
     void computeIntervals();
diff --git a/lib/CodeGen/LiveIntervalUnion.h b/include/llvm/CodeGen/LiveIntervalUnion.h
index 4d41fca85a..6a61614df4 100644
--- a/lib/CodeGen/LiveIntervalUnion.h
+++ b/include/llvm/CodeGen/LiveIntervalUnion.h
@@ -22,7 +22,6 @@
 
 namespace llvm {
 
-class MachineLoopRange;
 class TargetRegisterInfo;
 
 #ifndef NDEBUG
@@ -173,10 +172,6 @@ public:
       return InterferingVRegs;
     }
 
-    /// checkLoopInterference - Return true if there is interference overlapping
-    /// Loop.
-    bool checkLoopInterference(MachineLoopRange*);
-
   private:
     Query(const Query&) LLVM_DELETED_FUNCTION;
     void operator=(const Query&) LLVM_DELETED_FUNCTION;
diff --git a/lib/CodeGen/LiveRegMatrix.h b/include/llvm/CodeGen/LiveRegMatrix.h
index 8f22c24478..7a3e9e8347 100644
--- a/lib/CodeGen/LiveRegMatrix.h
+++ b/include/llvm/CodeGen/LiveRegMatrix.h
@@ -24,9 +24,9 @@
 #ifndef LLVM_CODEGEN_LIVEREGMATRIX_H
 #define LLVM_CODEGEN_LIVEREGMATRIX_H
 
-#include "LiveIntervalUnion.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/LiveStackAnalysis.h b/include/llvm/CodeGen/LiveStackAnalysis.h
index 86c4d7c110..a3b1855bbc 100644
--- a/include/llvm/CodeGen/LiveStackAnalysis.h
+++ b/include/llvm/CodeGen/LiveStackAnalysis.h
@@ -16,10 +16,10 @@
 #ifndef LLVM_CODEGEN_LIVESTACK_ANALYSIS_H
 #define LLVM_CODEGEN_LIVESTACK_ANALYSIS_H
 
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <map>
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h
index 3bb134b8fb..3c5ed9297e 100644
--- a/include/llvm/CodeGen/LiveVariables.h
+++ b/include/llvm/CodeGen/LiveVariables.h
@@ -29,16 +29,16 @@
 #ifndef LLVM_CODEGEN_LIVEVARIABLES_H
 #define LLVM_CODEGEN_LIVEVARIABLES_H
 
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 97c39458d9..81912742fa 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_CODEGEN_MACHINEBASICBLOCK_H
 #define LLVM_CODEGEN_MACHINEBASICBLOCK_H
 
-#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/ADT/GraphTraits.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Support/DataTypes.h"
 #include <functional>
 
diff --git a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
index 12189ceb7f..1c9bdd954c 100644
--- a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
+++ b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -15,8 +15,8 @@
 #ifndef LLVM_CODEGEN_MACHINEBRANCHPROBABILITYINFO_H
 #define LLVM_CODEGEN_MACHINEBRANCHPROBABILITYINFO_H
 
-#include "llvm/Pass.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/BranchProbability.h"
 #include <climits>
 
diff --git a/include/llvm/CodeGen/MachineCodeEmitter.h b/include/llvm/CodeGen/MachineCodeEmitter.h
index 86e8f27877..9e41e6e9c1 100644
--- a/include/llvm/CodeGen/MachineCodeEmitter.h
+++ b/include/llvm/CodeGen/MachineCodeEmitter.h
@@ -19,7 +19,6 @@
 
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/DebugLoc.h"
-
 #include <string>
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h
index 82a4ac821b..40b2542fd6 100644
--- a/include/llvm/CodeGen/MachineDominators.h
+++ b/include/llvm/CodeGen/MachineDominators.h
@@ -15,11 +15,11 @@
 #ifndef LLVM_CODEGEN_MACHINEDOMINATORS_H
 #define LLVM_CODEGEN_MACHINEDOMINATORS_H
 
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/DominatorInternals.h"
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index 0e4e132e40..93d77287d7 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -221,8 +221,11 @@ class MachineFrameInfo {
   /// just allocate them normally.
   bool UseLocalStackAllocationBlock;
 
+  /// Whether the "realign-stack" option is on.
+  bool RealignOption;
 public:
-    explicit MachineFrameInfo(const TargetFrameLowering &tfi) : TFI(tfi) {
+    explicit MachineFrameInfo(const TargetFrameLowering &tfi, bool RealignOpt)
+    : TFI(tfi), RealignOption(RealignOpt) {
     StackSize = NumFixedObjects = OffsetAdjustment = MaxAlignment = 0;
     HasVarSizedObjects = false;
     FrameAddressTaken = false;
@@ -432,9 +435,7 @@ public:
 
   /// ensureMaxAlignment - Make sure the function is at least Align bytes
   /// aligned.
-  void ensureMaxAlignment(unsigned Align) {
-    if (MaxAlignment < Align) MaxAlignment = Align;
-  }
+  void ensureMaxAlignment(unsigned Align);
 
   /// AdjustsStack - Return true if this function adjusts the stack -- e.g.,
   /// when calling another function. This is only valid during and after
@@ -496,26 +497,13 @@ public:
   /// a nonnegative identifier to represent it.
   ///
   int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS,
-                        bool MayNeedSP = false, const AllocaInst *Alloca = 0) {
-    assert(Size != 0 && "Cannot allocate zero size stack objects!");
-    Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP,
-                                  Alloca));
-    int Index = (int)Objects.size() - NumFixedObjects - 1;
-    assert(Index >= 0 && "Bad frame index!");
-    ensureMaxAlignment(Alignment);
-    return Index;
-  }
+                        bool MayNeedSP = false, const AllocaInst *Alloca = 0);
 
   /// CreateSpillStackObject - Create a new statically sized stack object that
   /// represents a spill slot, returning a nonnegative identifier to represent
   /// it.
   ///
-  int CreateSpillStackObject(uint64_t Size, unsigned Alignment) {
-    CreateStackObject(Size, Alignment, true, false);
-    int Index = (int)Objects.size() - NumFixedObjects - 1;
-    ensureMaxAlignment(Alignment);
-    return Index;
-  }
+  int CreateSpillStackObject(uint64_t Size, unsigned Alignment);
 
   /// RemoveStackObject - Remove or mark dead a statically sized stack object.
   ///
@@ -529,12 +517,7 @@ public:
   /// variable sized object is created, whether or not the index returned is
   /// actually used.
   ///
-  int CreateVariableSizedObject(unsigned Alignment) {
-    HasVarSizedObjects = true;
-    Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0));
-    ensureMaxAlignment(Alignment);
-    return (int)Objects.size()-NumFixedObjects-1;
-  }
+  int CreateVariableSizedObject(unsigned Alignment);
 
   /// getCalleeSavedInfo - Returns a reference to call saved info vector for the
   /// current function.
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index 025e18a9dd..bb5b23b00c 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -18,10 +18,10 @@
 #ifndef LLVM_CODEGEN_MACHINEFUNCTION_H
 #define LLVM_CODEGEN_MACHINEFUNCTION_H
 
-#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/ADT/ilist.h"
-#include "llvm/Support/DebugLoc.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/DebugLoc.h"
 #include "llvm/Support/Recycler.h"
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index 7eb03a9301..57da779ca1 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -16,17 +16,17 @@
 #ifndef LLVM_CODEGEN_MACHINEINSTR_H
 #define LLVM_CODEGEN_MACHINEINSTR_H
 
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/Target/TargetOpcodes.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/ilist.h"
-#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/InlineAsm.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/DebugLoc.h"
+#include "llvm/Target/TargetOpcodes.h"
 #include <vector>
 
 namespace llvm {
@@ -58,8 +58,8 @@ public:
     NoFlags      = 0,
     FrameSetup   = 1 << 0,              // Instruction is used as a part of
                                         // function frame setup code.
-    InsideBundle = 1 << 1               // Instruction is inside a bundle (not
-                                        // the first MI in a bundle)
+    BundledPred  = 1 << 1,              // Instruction has bundled predecessors.
+    BundledSucc  = 1 << 2               // Instruction has bundled successors.
   };
 private:
   const MCInstrDesc *MCID;              // Instruction descriptor.
@@ -94,21 +94,11 @@ private:
   /// MachineInstr in the given MachineFunction.
   MachineInstr(MachineFunction &, const MachineInstr &);
 
-  /// MachineInstr ctor - This constructor creates a dummy MachineInstr with
-  /// MCID NULL and no operands.
-  MachineInstr();
-
   /// MachineInstr ctor - This constructor create a MachineInstr and add the
   /// implicit operands.  It reserves space for number of operands specified by
   /// MCInstrDesc.  An explicit DebugLoc is supplied.
   MachineInstr(const MCInstrDesc &MCID, const DebugLoc dl, bool NoImp = false);
 
-  /// MachineInstr ctor - Work exactly the same as the ctor above, except that
-  /// the MachineInstr is created and added to the end of the specified basic
-  /// block.
-  MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
-               const MCInstrDesc &MCID);
-
   ~MachineInstr();
 
   // MachineInstrs are pool-allocated and owned by MachineFunction.
@@ -205,22 +195,44 @@ public:
   /// The first instruction has the special opcode "BUNDLE". It's not "inside"
   /// a bundle, but the next three MIs are.
   bool isInsideBundle() const {
-    return getFlag(InsideBundle);
+    return getFlag(BundledPred);
   }
 
   /// setIsInsideBundle - Set InsideBundle bit.
   ///
   void setIsInsideBundle(bool Val = true) {
     if (Val)
-      setFlag(InsideBundle);
+      setFlag(BundledPred);
     else
-      clearFlag(InsideBundle);
+      clearFlag(BundledPred);
   }
 
   /// isBundled - Return true if this instruction part of a bundle. This is true
   /// if either itself or its following instruction is marked "InsideBundle".
   bool isBundled() const;
 
+  /// Return true if this instruction is part of a bundle, and it is not the
+  /// first instruction in the bundle.
+  bool isBundledWithPred() const { return getFlag(BundledPred); }
+
+  /// Return true if this instruction is part of a bundle, and it is not the
+  /// last instruction in the bundle.
+  bool isBundledWithSucc() const { return getFlag(BundledSucc); }
+
+  /// Bundle this instruction with its predecessor. This can be an unbundled
+  /// instruction, or it can be the first instruction in a bundle.
+  void bundleWithPred();
+
+  /// Bundle this instruction with its successor. This can be an unbundled
+  /// instruction, or it can be the last instruction in a bundle.
+  void bundleWithSucc();
+
+  /// Break bundle above this instruction.
+  void unbundleFromPred();
+
+  /// Break bundle below this instruction.
+  void unbundleFromSucc();
+
   /// getDebugLoc - Returns the debug location id of this MachineInstr.
   ///
   DebugLoc getDebugLoc() const { return debugLoc; }
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index 01291e43c8..333ab87d87 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -346,6 +346,70 @@ inline unsigned getInternalReadRegState(bool B) {
   return B ? RegState::InternalRead : 0;
 }
 
+
+/// Helper class for constructing bundles of MachineInstrs.
+///
+/// MIBundleBuilder can create a bundle from scratch by inserting new
+/// MachineInstrs one at a time, or it can create a bundle from a sequence of
+/// existing MachineInstrs in a basic block.
+class MIBundleBuilder {
+  MachineBasicBlock &MBB;
+  MachineBasicBlock::instr_iterator Begin;
+  MachineBasicBlock::instr_iterator End;
+
+public:
+  /// Create an MIBundleBuilder that inserts instructions into a new bundle in
+  /// BB above the bundle or instruction at Pos.
+  MIBundleBuilder(MachineBasicBlock &BB,
+                  MachineBasicBlock::iterator Pos)
+    : MBB(BB), Begin(Pos.getInstrIterator()), End(Begin) {}
+
+  /// Create a bundle from the sequence of instructions between B and E.
+  MIBundleBuilder(MachineBasicBlock &BB,
+                  MachineBasicBlock::iterator B,
+                  MachineBasicBlock::iterator E)
+    : MBB(BB), Begin(B.getInstrIterator()), End(E.getInstrIterator()) {
+    assert(B != E && "No instructions to bundle");
+    ++B;
+    while (B != E) {
+      MachineInstr *MI = B;
+      ++B;
+      MI->bundleWithPred();
+    }
+  }
+
+  /// Return true if no instructions have been inserted in this bundle yet.
+  /// Empty bundles aren't representable in a MachineBasicBlock.
+  bool empty() const { return Begin == End; }
+
+  /// Return an iterator to the first bundled instruction.
+  MachineBasicBlock::instr_iterator begin() const { return Begin; }
+
+  /// Return an iterator beyond the last bundled instruction.
+  MachineBasicBlock::instr_iterator end() const { return End; }
+
+  /// Insert MI into MBB by prepending it to the instructions in the bundle.
+  /// MI will become the first instruction in the bundle.
+  MIBundleBuilder &prepend(MachineInstr *MI) {
+    MBB.insert(Begin, MI);
+    if (!empty())
+      MI->bundleWithSucc();
+    Begin = MI;
+    return *this;
+  }
+
+  /// Insert MI into MBB by appending it to the instructions in the bundle.
+  /// MI will become the last instruction in the bundle.
+  MIBundleBuilder &append(MachineInstr *MI) {
+    MBB.insert(End, MI);
+    if (empty())
+      Begin = MI;
+    else
+      MI->bundleWithPred();
+    return *this;
+  }
+};
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/MachineJumpTableInfo.h b/include/llvm/CodeGen/MachineJumpTableInfo.h
index 928145d279..adcd1d0de6 100644
--- a/include/llvm/CodeGen/MachineJumpTableInfo.h
+++ b/include/llvm/CodeGen/MachineJumpTableInfo.h
@@ -20,8 +20,8 @@
 #ifndef LLVM_CODEGEN_MACHINEJUMPTABLEINFO_H
 #define LLVM_CODEGEN_MACHINEJUMPTABLEINFO_H
 
-#include <vector>
 #include <cassert>
+#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h
index d53f041128..99da6b2745 100644
--- a/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/include/llvm/CodeGen/MachineLoopInfo.h
@@ -30,8 +30,8 @@
 #ifndef LLVM_CODEGEN_MACHINE_LOOP_INFO_H
 #define LLVM_CODEGEN_MACHINE_LOOP_INFO_H
 
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/MachineLoopRanges.h b/include/llvm/CodeGen/MachineLoopRanges.h
deleted file mode 100644
index 6a30e8b53c..0000000000
--- a/include/llvm/CodeGen/MachineLoopRanges.h
+++ /dev/null
@@ -1,112 +0,0 @@
-//===- MachineLoopRanges.h - Ranges of machine loops -----------*- c++ -*--===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides the interface to the MachineLoopRanges analysis.
-//
-// Provide on-demand information about the ranges of machine instructions
-// covered by a loop.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_MACHINELOOPRANGES_H
-#define LLVM_CODEGEN_MACHINELOOPRANGES_H
-
-#include "llvm/ADT/IntervalMap.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-
-namespace llvm {
-
-class MachineLoop;
-class MachineLoopInfo;
-class raw_ostream;
-
-/// MachineLoopRange - Range information for a single loop.
-class MachineLoopRange {
-  friend class MachineLoopRanges;
-
-public:
-  typedef IntervalMap<SlotIndex, unsigned, 4> Map;
-  typedef Map::Allocator Allocator;
-
-private:
-  /// The mapped loop.
-  const MachineLoop *const Loop;
-
-  /// Map intervals to a bit mask.
-  /// Bit 0 = inside loop block.
-  Map Intervals;
-
-  /// Loop area as measured by SlotIndex::distance.
-  unsigned Area;
-
-  /// Create a MachineLoopRange, only accessible to MachineLoopRanges.
-  MachineLoopRange(const MachineLoop*, Allocator&, SlotIndexes&);
-
-public:
-  /// getLoop - Return the mapped machine loop.
-  const MachineLoop *getLoop() const { return Loop; }
-
-  /// overlaps - Return true if this loop overlaps the given range of machine
-  /// inteructions.
-  bool overlaps(SlotIndex Start, SlotIndex Stop);
-
-  /// getNumber - Return the loop number. This is the same as the number of the
-  /// header block.
-  unsigned getNumber() const;
-
-  /// getArea - Return the loop area. This number is approximately proportional
-  /// to the number of instructions in the loop.
-  unsigned getArea() const { return Area; }
-
-  /// getMap - Allow public read-only access for IntervalMapOverlaps.
-  const Map &getMap() { return Intervals; }
-
-  /// print - Print loop ranges on OS.
-  void print(raw_ostream&) const;
-
-  /// byNumber - Comparator for array_pod_sort that sorts a list of
-  /// MachineLoopRange pointers by number.
-  static int byNumber(const void*, const void*);
-
-  /// byAreaDesc - Comparator for array_pod_sort that sorts a list of
-  /// MachineLoopRange pointers by descending area, then by number.
-  static int byAreaDesc(const void*, const void*);
-};
-
-raw_ostream &operator<<(raw_ostream&, const MachineLoopRange&);
-
-/// MachineLoopRanges - Analysis pass that provides on-demand per-loop range
-/// information.
-class MachineLoopRanges : public MachineFunctionPass {
-  typedef DenseMap<const MachineLoop*, MachineLoopRange*> CacheMap;
-  typedef MachineLoopRange::Allocator MapAllocator;
-
-  MapAllocator Allocator;
-  SlotIndexes *Indexes;
-  CacheMap Cache;
-
-public:
-  static char ID; // Pass identification, replacement for typeid
-
-  MachineLoopRanges() : MachineFunctionPass(ID), Indexes(0) {}
-  ~MachineLoopRanges() { releaseMemory(); }
-
-  /// getLoopRange - Return the range of loop.
-  MachineLoopRange *getLoopRange(const MachineLoop *Loop);
-
-private:
-  virtual bool runOnMachineFunction(MachineFunction&);
-  virtual void releaseMemory();
-  virtual void getAnalysisUsage(AnalysisUsage&) const;
-};
-
-
-} // end namespace llvm
-
-#endif // LLVM_CODEGEN_MACHINELOOPRANGES_H
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index fc73a3d609..cf9e8b61ef 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -31,19 +31,19 @@
 #ifndef LLVM_CODEGEN_MACHINEMODULEINFO_H
 #define LLVM_CODEGEN_MACHINEMODULEINFO_H
 
-#include "llvm/Pass.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Metadata.h"
-#include "llvm/MC/MachineLocation.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/DebugLoc.h"
-#include "llvm/Support/ValueHandle.h"
-#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ValueHandle.h"
 
 namespace llvm {
 
@@ -180,8 +180,9 @@ public:
                     const MCObjectFileInfo *MOFI);
   ~MachineModuleInfo();
 
-  bool doInitialization();
-  bool doFinalization();
+  // Initialization and Finalization
+  virtual bool doInitialization(Module &);
+  virtual bool doFinalization(Module &);
 
   /// EndFunction - Discard function meta information.
   ///
diff --git a/include/llvm/CodeGen/MachinePostDominators.h b/include/llvm/CodeGen/MachinePostDominators.h
index a9fc8434ab..00a60cb1cc 100644
--- a/include/llvm/CodeGen/MachinePostDominators.h
+++ b/include/llvm/CodeGen/MachinePostDominators.h
@@ -15,10 +15,10 @@
 #ifndef LLVM_CODEGEN_MACHINEPOSTDOMINATORS_H
 #define LLVM_CODEGEN_MACHINEPOSTDOMINATORS_H
 
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 4e86363f07..0cf584fd71 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -14,10 +14,10 @@
 #ifndef LLVM_CODEGEN_MACHINEREGISTERINFO_H
 #define LLVM_CODEGEN_MACHINEREGISTERINFO_H
 
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/IndexedMap.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h
index 83c379b48c..85bf511d60 100644
--- a/include/llvm/CodeGen/PBQP/Graph.h
+++ b/include/llvm/CodeGen/PBQP/Graph.h
@@ -16,10 +16,10 @@
 #define LLVM_CODEGEN_PBQP_GRAPH_H
 
 #include "Math.h"
-
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
 #include <list>
 #include <map>
-#include <llvm/ADT/ilist.h>
 
 namespace PBQP {
 
diff --git a/include/llvm/CodeGen/PBQP/HeuristicSolver.h b/include/llvm/CodeGen/PBQP/HeuristicSolver.h
index 35514f9674..47e15b27e7 100644
--- a/include/llvm/CodeGen/PBQP/HeuristicSolver.h
+++ b/include/llvm/CodeGen/PBQP/HeuristicSolver.h
@@ -18,8 +18,8 @@
 
 #include "Graph.h"
 #include "Solution.h"
-#include <vector>
 #include <limits>
+#include <vector>
 
 namespace PBQP {
 
diff --git a/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h b/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
index a859e5899f..307d81e1d1 100644
--- a/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
+++ b/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
@@ -18,9 +18,8 @@
 #ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
 #define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
 
-#include "../HeuristicSolver.h"
 #include "../HeuristicBase.h"
-
+#include "../HeuristicSolver.h"
 #include <limits>
 
 namespace PBQP {
diff --git a/include/llvm/CodeGen/PBQP/Math.h b/include/llvm/CodeGen/PBQP/Math.h
index e7598bf3e3..4e51913eba 100644
--- a/include/llvm/CodeGen/PBQP/Math.h
+++ b/include/llvm/CodeGen/PBQP/Math.h
@@ -10,8 +10,8 @@
 #ifndef LLVM_CODEGEN_PBQP_MATH_H 
 #define LLVM_CODEGEN_PBQP_MATH_H
 
-#include <cassert>
 #include <algorithm>
+#include <cassert>
 #include <functional>
 
 namespace PBQP {
diff --git a/include/llvm/CodeGen/PBQP/Solution.h b/include/llvm/CodeGen/PBQP/Solution.h
index 57d9b95fc3..b9f288bbee 100644
--- a/include/llvm/CodeGen/PBQP/Solution.h
+++ b/include/llvm/CodeGen/PBQP/Solution.h
@@ -14,9 +14,8 @@
 #ifndef LLVM_CODEGEN_PBQP_SOLUTION_H
 #define LLVM_CODEGEN_PBQP_SOLUTION_H
 
-#include "Math.h"
 #include "Graph.h"
-
+#include "Math.h"
 #include <map>
 
 namespace PBQP {
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 7bd576494e..282e4edcfb 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -141,6 +141,10 @@ public:
   /// Add passes to lower exception handling for the code generator.
   void addPassesToHandleExceptions();
 
+  /// Add pass to prepare the LLVM IR for code generation. This should be done
+  /// before exception handling preparation passes.
+  virtual void addCodeGenPrepare();
+
   /// Add common passes that perform LLVM IR to IR transforms in preparation for
   /// instruction selection.
   virtual void addISelPrepare();
@@ -288,9 +292,6 @@ namespace llvm {
   /// MachineLoopInfo - This pass is a loop analysis pass.
   extern char &MachineLoopInfoID;
 
-  /// MachineLoopRanges - This pass is an on-demand loop coverage analysis.
-  extern char &MachineLoopRangesID;
-
   /// MachineDominators - This pass is a machine dominators analysis pass.
   extern char &MachineDominatorsID;
 
diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h
index acfc07dd31..b617c14558 100644
--- a/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/include/llvm/CodeGen/RegAllocPBQP.h
@@ -20,7 +20,6 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/PBQP/Graph.h"
 #include "llvm/CodeGen/PBQP/Solution.h"
-
 #include <map>
 #include <set>
 
@@ -29,6 +28,7 @@ namespace llvm {
   class LiveIntervals;
   class MachineFunction;
   class MachineLoopInfo;
+  class TargetRegisterInfo;
 
   /// This class wraps up a PBQP instance representing a register allocation
   /// problem, plus the structures necessary to map back from the PBQP solution
diff --git a/include/llvm/CodeGen/RegisterClassInfo.h b/include/llvm/CodeGen/RegisterClassInfo.h
index 4467b62f23..12bd1c61d2 100644
--- a/include/llvm/CodeGen/RegisterClassInfo.h
+++ b/include/llvm/CodeGen/RegisterClassInfo.h
@@ -29,10 +29,10 @@ class RegisterClassInfo {
     unsigned Tag;
     unsigned NumRegs;
     bool ProperSubClass;
-    OwningArrayPtr<unsigned> Order;
+    OwningArrayPtr<MCPhysReg> Order;
 
     RCInfo() : Tag(0), NumRegs(0), ProperSubClass(false) {}
-    operator ArrayRef<unsigned>() const {
+    operator ArrayRef<MCPhysReg>() const {
       return makeArrayRef(Order.get(), NumRegs);
     }
   };
@@ -84,7 +84,7 @@ public:
   /// getOrder - Returns the preferred allocation order for RC. The order
   /// contains no reserved registers, and registers that alias callee saved
   /// registers come last.
-  ArrayRef<unsigned> getOrder(const TargetRegisterClass *RC) const {
+  ArrayRef<MCPhysReg> getOrder(const TargetRegisterClass *RC) const {
     return get(RC);
   }
 
diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h
index 30326d05df..2670180746 100644
--- a/include/llvm/CodeGen/RegisterPressure.h
+++ b/include/llvm/CodeGen/RegisterPressure.h
@@ -15,13 +15,14 @@
 #ifndef LLVM_CODEGEN_REGISTERPRESSURE_H
 #define LLVM_CODEGEN_REGISTERPRESSURE_H
 
+#include "llvm/ADT/SparseSet.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SparseSet.h"
 
 namespace llvm {
 
 class LiveIntervals;
+class LiveInterval;
 class RegisterClassInfo;
 class MachineInstr;
 
@@ -30,18 +31,24 @@ struct RegisterPressure {
   /// Map of max reg pressure indexed by pressure set ID, not class ID.
   std::vector<unsigned> MaxSetPressure;
 
-  /// List of live in registers.
+  /// List of live in virtual registers or physical register units.
   SmallVector<unsigned,8> LiveInRegs;
   SmallVector<unsigned,8> LiveOutRegs;
 
   /// Increase register pressure for each pressure set impacted by this register
   /// class. Normally called by RegPressureTracker, but may be called manually
   /// to account for live through (global liveness).
-  void increase(const TargetRegisterClass *RC, const TargetRegisterInfo *TRI);
+  ///
+  /// \param Reg is either a virtual register number or register unit number.
+  void increase(unsigned Reg, const TargetRegisterInfo *TRI,
+                const MachineRegisterInfo *MRI);
 
   /// Decrease register pressure for each pressure set impacted by this register
   /// class. This is only useful to account for spilling or rematerialization.
-  void decrease(const TargetRegisterClass *RC, const TargetRegisterInfo *TRI);
+  ///
+  /// \param Reg is either a virtual register number or register unit number.
+  void decrease(unsigned Reg, const TargetRegisterInfo *TRI,
+                const MachineRegisterInfo *MRI);
 
   void dump(const TargetRegisterInfo *TRI) const;
 };
@@ -116,6 +123,33 @@ struct RegPressureDelta {
   RegPressureDelta() {}
 };
 
+/// \brief A set of live virtual registers and physical register units.
+///
+/// Virtual and physical register numbers require separate sparse sets, but most
+/// of the RegisterPressureTracker handles them uniformly.
+struct LiveRegSet {
+  SparseSet<unsigned> PhysRegs;
+  SparseSet<unsigned, VirtReg2IndexFunctor> VirtRegs;
+
+  bool contains(unsigned Reg) {
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      return VirtRegs.count(Reg);
+    return PhysRegs.count(Reg);
+  }
+
+  bool insert(unsigned Reg) {
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      return VirtRegs.insert(Reg).second;
+    return PhysRegs.insert(Reg).second;
+  }
+
+  bool erase(unsigned Reg) {
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      return VirtRegs.erase(Reg);
+    return PhysRegs.erase(Reg);
+  }
+};
+
 /// Track the current register pressure at some position in the instruction
 /// stream, and remember the high water mark within the region traversed. This
 /// does not automatically consider live-through ranges. The client may
@@ -150,15 +184,15 @@ class RegPressureTracker {
   bool RequireIntervals;
 
   /// Register pressure corresponds to liveness before this instruction
-  /// iterator. It may point to the end of the block rather than an instruction.
+  /// iterator. It may point to the end of the block or a DebugValue rather than
+  /// an instruction.
   MachineBasicBlock::const_iterator CurrPos;
 
   /// Pressure map indexed by pressure set ID, not class ID.
   std::vector<unsigned> CurrSetPressure;
 
-  /// List of live registers.
-  SparseSet<unsigned> LivePhysRegs;
-  SparseSet<unsigned, VirtReg2IndexFunctor> LiveVirtRegs;
+  /// Set of live registers.
+  LiveRegSet LiveRegs;
 
 public:
   RegPressureTracker(IntervalPressure &rp) :
@@ -171,8 +205,9 @@ public:
             const LiveIntervals *lis, const MachineBasicBlock *mbb,
             MachineBasicBlock::const_iterator pos);
 
-  /// Force liveness of registers. Particularly useful to initialize the
-  /// livein/out state of the tracker before the first call to advance/recede.
+  /// Force liveness of virtual registers or physical register
+  /// units. Particularly useful to initialize the livein/out state of the
+  /// tracker before the first call to advance/recede.
   void addLiveRegs(ArrayRef<unsigned> Regs);
 
   /// Get the MI position corresponding to this register pressure.
@@ -184,6 +219,10 @@ public:
   // position changes while pressure does not.
   void setPos(MachineBasicBlock::const_iterator Pos) { CurrPos = Pos; }
 
+  /// \brief Get the SlotIndex for the first nondebug instruction including or
+  /// after the current position.
+  SlotIndex getCurrSlot() const;
+
   /// Recede across the previous instruction.
   bool recede();
 
@@ -203,11 +242,8 @@ public:
   /// than the pressure across the traversed region.
   std::vector<unsigned> &getRegSetPressureAtPos() { return CurrSetPressure; }
 
-  void discoverPhysLiveIn(unsigned Reg);
-  void discoverPhysLiveOut(unsigned Reg);
-
-  void discoverVirtLiveIn(unsigned Reg);
-  void discoverVirtLiveOut(unsigned Reg);
+  void discoverLiveOut(unsigned Reg);
+  void discoverLiveIn(unsigned Reg);
 
   bool isTopClosed() const;
   bool isBottomClosed() const;
@@ -268,12 +304,13 @@ public:
     return getDownwardPressure(MI, PressureResult, MaxPressureResult);
   }
 
+  void dump() const;
+
 protected:
-  void increasePhysRegPressure(ArrayRef<unsigned> Regs);
-  void decreasePhysRegPressure(ArrayRef<unsigned> Regs);
+  const LiveInterval *getInterval(unsigned Reg) const;
 
-  void increaseVirtRegPressure(ArrayRef<unsigned> Regs);
-  void decreaseVirtRegPressure(ArrayRef<unsigned> Regs);
+  void increaseRegPressure(ArrayRef<unsigned> Regs);
+  void decreaseRegPressure(ArrayRef<unsigned> Regs);
 
   void bumpUpwardPressure(const MachineInstr *MI);
   void bumpDownwardPressure(const MachineInstr *MI);
diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h
index 8752e67a79..6604d4cdda 100644
--- a/include/llvm/CodeGen/RegisterScavenging.h
+++ b/include/llvm/CodeGen/RegisterScavenging.h
@@ -17,9 +17,9 @@
 #ifndef LLVM_CODEGEN_REGISTER_SCAVENGING_H
 #define LLVM_CODEGEN_REGISTER_SCAVENGING_H
 
+#include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/ADT/BitVector.h"
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/ResourcePriorityQueue.h b/include/llvm/CodeGen/ResourcePriorityQueue.h
index 56b5855c01..66a6039668 100644
--- a/include/llvm/CodeGen/ResourcePriorityQueue.h
+++ b/include/llvm/CodeGen/ResourcePriorityQueue.h
@@ -18,8 +18,8 @@
 #define RESOURCE_PRIORITY_QUEUE_H
 
 #include "llvm/CodeGen/DFAPacketizer.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 016722e7f4..aa91b0300a 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -16,13 +16,13 @@
 #ifndef LLVM_CODEGEN_SCHEDULEDAG_H
 #define LLVM_CODEGEN_SCHEDULEDAG_H
 
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
   class AliasAnalysis;
diff --git a/include/llvm/CodeGen/ScheduleDAGILP.h b/include/llvm/CodeGen/ScheduleDAGILP.h
deleted file mode 100644
index 1aa4058421..0000000000
--- a/include/llvm/CodeGen/ScheduleDAGILP.h
+++ /dev/null
@@ -1,86 +0,0 @@
-//===- ScheduleDAGILP.h - ILP metric for ScheduleDAGInstrs ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Definition of an ILP metric for machine level instruction scheduling.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_SCHEDULEDAGILP_H
-#define LLVM_CODEGEN_SCHEDULEDAGILP_H
-
-#include "llvm/Support/DataTypes.h"
-#include <vector>
-
-namespace llvm {
-
-class raw_ostream;
-class ScheduleDAGInstrs;
-class SUnit;
-
-/// \brief Represent the ILP of the subDAG rooted at a DAG node.
-struct ILPValue {
-  unsigned InstrCount;
-  unsigned Cycles;
-
-  ILPValue(): InstrCount(0), Cycles(0) {}
-
-  ILPValue(unsigned count, unsigned cycles):
-    InstrCount(count), Cycles(cycles) {}
-
-  bool isValid() const { return Cycles > 0; }
-
-  // Order by the ILP metric's value.
-  bool operator<(ILPValue RHS) const {
-    return (uint64_t)InstrCount * RHS.Cycles
-      < (uint64_t)Cycles * RHS.InstrCount;
-  }
-  bool operator>(ILPValue RHS) const {
-    return RHS < *this;
-  }
-  bool operator<=(ILPValue RHS) const {
-    return (uint64_t)InstrCount * RHS.Cycles
-      <= (uint64_t)Cycles * RHS.InstrCount;
-  }
-  bool operator>=(ILPValue RHS) const {
-    return RHS <= *this;
-  }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-  void print(raw_ostream &OS) const;
-
-  void dump() const;
-#endif
-};
-
-/// \brief Compute the values of each DAG node for an ILP metric.
-///
-/// This metric assumes that the DAG is a forest of trees with roots at the
-/// bottom of the schedule.
-class ScheduleDAGILP {
-  bool IsBottomUp;
-  std::vector<ILPValue> ILPValues;
-
-public:
-  ScheduleDAGILP(bool IsBU): IsBottomUp(IsBU) {}
-
-  /// \brief Initialize the result data with the size of the DAG.
-  void resize(unsigned NumSUnits);
-
-  /// \brief Compute the ILP metric for the subDAG at this root.
-  void computeILP(const SUnit *Root);
-
-  /// \brief Get the ILP value for a DAG node.
-  ILPValue getILP(const SUnit *SU);
-};
-
-raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val);
-
-} // namespace llvm
-
-#endif
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 4bcd35a834..ffc442e3b8 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -15,14 +15,14 @@
 #ifndef SCHEDULEDAGINSTRS_H
 #define SCHEDULEDAGINSTRS_H
 
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SparseSet.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SparseSet.h"
 #include <map>
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/ScheduleDFS.h b/include/llvm/CodeGen/ScheduleDFS.h
new file mode 100644
index 0000000000..fbbadd95ad
--- /dev/null
+++ b/include/llvm/CodeGen/ScheduleDFS.h
@@ -0,0 +1,152 @@
+//===- ScheduleDAGILP.h - ILP metric for ScheduleDAGInstrs ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Definition of an ILP metric for machine level instruction scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SCHEDULEDAGILP_H
+#define LLVM_CODEGEN_SCHEDULEDAGILP_H
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/DataTypes.h"
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+class IntEqClasses;
+class ScheduleDAGInstrs;
+class SUnit;
+
+/// \brief Represent the ILP of the subDAG rooted at a DAG node.
+///
+/// When computed using bottom-up DFS, this metric assumes that the DAG is a
+/// forest of trees with roots at the bottom of the schedule branching upward.
+struct ILPValue {
+  unsigned InstrCount;
+  /// Length may either correspond to depth or height, depending on direction,
+  /// and cycles or nodes depending on context.
+  unsigned Length;
+
+  ILPValue(unsigned count, unsigned length):
+    InstrCount(count), Length(length) {}
+
+  // Order by the ILP metric's value.
+  bool operator<(ILPValue RHS) const {
+    return (uint64_t)InstrCount * RHS.Length
+      < (uint64_t)Length * RHS.InstrCount;
+  }
+  bool operator>(ILPValue RHS) const {
+    return RHS < *this;
+  }
+  bool operator<=(ILPValue RHS) const {
+    return (uint64_t)InstrCount * RHS.Length
+      <= (uint64_t)Length * RHS.InstrCount;
+  }
+  bool operator>=(ILPValue RHS) const {
+    return RHS <= *this;
+  }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  void print(raw_ostream &OS) const;
+
+  void dump() const;
+#endif
+};
+
+/// \brief Compute the values of each DAG node for various metrics during DFS.
+///
+/// ILPValues summarize the DAG subtree rooted at each node up to
+/// SubtreeLimit. ILPValues are also valid for interior nodes of a subtree, not
+/// just the root.
+class SchedDFSResult {
+  friend class SchedDFSImpl;
+
+  /// \brief Per-SUnit data computed during DFS for various metrics.
+  struct NodeData {
+    unsigned InstrCount;
+    unsigned SubtreeID;
+
+    NodeData(): InstrCount(0), SubtreeID(0) {}
+  };
+
+  /// \brief Record a connection between subtrees and the connection level.
+  struct Connection {
+    unsigned TreeID;
+    unsigned Level;
+
+    Connection(unsigned tree, unsigned level): TreeID(tree), Level(level) {}
+  };
+
+  bool IsBottomUp;
+  unsigned SubtreeLimit;
+  /// DFS results for each SUnit in this DAG.
+  std::vector<NodeData> DFSData;
+
+  // For each subtree discovered during DFS, record its connections to other
+  // subtrees.
+  std::vector<SmallVector<Connection, 4> > SubtreeConnections;
+
+  /// Cache the current connection level of each subtree.
+  /// This mutable array is updated during scheduling.
+  std::vector<unsigned> SubtreeConnectLevels;
+
+public:
+  SchedDFSResult(bool IsBU, unsigned lim)
+    : IsBottomUp(IsBU), SubtreeLimit(lim) {}
+
+  /// \brief Clear the results.
+  void clear() {
+    DFSData.clear();
+    SubtreeConnections.clear();
+    SubtreeConnectLevels.clear();
+  }
+
+  /// \brief Initialize the result data with the size of the DAG.
+  void resize(unsigned NumSUnits) {
+    DFSData.resize(NumSUnits);
+  }
+
+  /// \brief Compute various metrics for the DAG with given roots.
+  void compute(ArrayRef<SUnit *> Roots);
+
+  /// \brief Get the ILP value for a DAG node.
+  ///
+  /// A leaf node has an ILP of 1/1.
+  ILPValue getILP(const SUnit *SU) {
+    return ILPValue(DFSData[SU->NodeNum].InstrCount, 1 + SU->getDepth());
+  }
+
+  /// \brief The number of subtrees detected in this DAG.
+  unsigned getNumSubtrees() const { return SubtreeConnectLevels.size(); }
+
+  /// \brief Get the ID of the subtree the given DAG node belongs to.
+  unsigned getSubtreeID(const SUnit *SU) {
+    return DFSData[SU->NodeNum].SubtreeID;
+  }
+
+  /// \brief Get the connection level of a subtree.
+  ///
+  /// For bottom-up trees, the connection level is the latency depth (in cycles)
+  /// of the deepest connection to another subtree.
+  unsigned getSubtreeLevel(unsigned SubtreeID) {
+    return SubtreeConnectLevels[SubtreeID];
+  }
+
+  /// \brief Scheduler callback to update SubtreeConnectLevels when a tree is
+  /// initially scheduled.
+  void scheduleTree(unsigned SubtreeID);
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val);
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
index 060e89a3fd..c2103fb233 100644
--- a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
+++ b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
@@ -18,7 +18,6 @@
 
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/Support/DataTypes.h"
-
 #include <cassert>
 #include <cstring>
 
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 619ee69943..b150c29d0f 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -15,16 +15,16 @@
 #ifndef LLVM_CODEGEN_SELECTIONDAG_H
 #define LLVM_CODEGEN_SELECTIONDAG_H
 
-#include "llvm/ADT/ilist.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/ilist.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/Support/RecyclingAllocator.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cassert>
-#include <vector>
 #include <map>
 #include <string>
+#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index c42f655800..95d4c37a56 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -16,9 +16,9 @@
 #define LLVM_CODEGEN_SELECTIONDAG_ISEL_H
 
 #include "llvm/BasicBlock.h"
-#include "llvm/Pass.h"
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
   class FastISel;
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 362e9afd22..252d9ca173 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -19,20 +19,20 @@
 #ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
 #define LLVM_CODEGEN_SELECTIONDAGNODES_H
 
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ilist_node.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/MathExtras.h"
 #include <cassert>
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index c52599b0f6..4d66793e80 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -19,13 +19,13 @@
 #ifndef LLVM_CODEGEN_SLOTINDEXES_H
 #define LLVM_CODEGEN_SLOTINDEXES_H
 
-#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PointerIntPair.h"
-#include "llvm/ADT/ilist.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/Support/Allocator.h"
 
 namespace llvm {
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index f1b3065e7c..e7098e48bf 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -15,9 +15,9 @@
 #ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
 #define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
   class MachineModuleInfo;
diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h
index 88e6105a7d..4c4a2a8b95 100644
--- a/include/llvm/CodeGen/TargetSchedule.h
+++ b/include/llvm/CodeGen/TargetSchedule.h
@@ -16,10 +16,10 @@
 #ifndef LLVM_TARGET_TARGETSCHEDMODEL_H
 #define LLVM_TARGET_TARGETSCHEDMODEL_H
 
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/MC/MCSchedule.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/MCSchedule.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 
 namespace llvm {
 
diff --git a/lib/CodeGen/VirtRegMap.h b/include/llvm/CodeGen/VirtRegMap.h
index 7974dda66a..3bc6ebd563 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/include/llvm/CodeGen/VirtRegMap.h
@@ -17,9 +17,9 @@
 #ifndef LLVM_CODEGEN_VIRTREGMAP_H
 #define LLVM_CODEGEN_VIRTREGMAP_H
 
+#include "llvm/ADT/IndexedMap.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/IndexedMap.h"
 
 namespace llvm {
   class MachineInstr;
@@ -126,13 +126,13 @@ namespace llvm {
       grow();
     }
 
-    /// @brief returns the register allocation preference.
-    unsigned getRegAllocPref(unsigned virtReg);
-
     /// @brief returns true if VirtReg is assigned to its preferred physreg.
-    bool hasPreferredPhys(unsigned VirtReg) {
-      return getPhys(VirtReg) == getRegAllocPref(VirtReg);
-    }
+    bool hasPreferredPhys(unsigned VirtReg);
+
+    /// @brief returns true if VirtReg has a known preferred register.
+    /// This returns false if VirtReg has a preference that is a virtual
+    /// register that hasn't been assigned yet.
+    bool hasKnownPreference(unsigned VirtReg);
 
     /// @brief records virtReg is a split live interval from SReg.
     void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
index 456c814596..23e548ae30 100644
--- a/include/llvm/Constants.h
+++ b/include/llvm/Constants.h
@@ -21,11 +21,11 @@
 #ifndef LLVM_CONSTANTS_H
 #define LLVM_CONSTANTS_H
 
-#include "llvm/Constant.h"
-#include "llvm/OperandTraits.h"
-#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/Constant.h"
+#include "llvm/OperandTraits.h"
 
 namespace llvm {
 
@@ -1082,8 +1082,7 @@ public:
   /// A better approach to this could be to have a constructor for Instruction
   /// which would take a ConstantExpr parameter, but that would have spread 
   /// implementation details of ConstantExpr outside of Constants.cpp, which 
-  /// would make it harder to remove ConstantExprs altogether
-  /// (http://llvm.org/bugs/show_bug.cgi?id=10368).
+  /// would make it harder to remove ConstantExprs altogether.
   Instruction *getAsInstruction();
 
   virtual void destroyConstant();
diff --git a/include/llvm/DIBuilder.h b/include/llvm/DIBuilder.h
index 2f07800680..f6bc7b12ec 100644
--- a/include/llvm/DIBuilder.h
+++ b/include/llvm/DIBuilder.h
@@ -15,9 +15,9 @@
 #ifndef LLVM_ANALYSIS_DIBUILDER_H
 #define LLVM_ANALYSIS_DIBUILDER_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
   class BasicBlock;
@@ -371,7 +371,7 @@ namespace llvm {
 
     /// getOrCreateSubrange - Create a descriptor for a value range.  This
     /// implicitly uniques the values returned.
-    DISubrange getOrCreateSubrange(int64_t Lo, int64_t Hi);
+    DISubrange getOrCreateSubrange(int64_t Lo, int64_t Count);
 
     /// createGlobalVariable - Create a new descriptor for the specified global.
     /// @param Name        Name of the variable.
diff --git a/include/llvm/DataLayout.h b/include/llvm/DataLayout.h
index e10f9c74c9..4cb7766387 100644
--- a/include/llvm/DataLayout.h
+++ b/include/llvm/DataLayout.h
@@ -20,9 +20,9 @@
 #ifndef LLVM_DATALAYOUT_H
 #define LLVM_DATALAYOUT_H
 
-#include "llvm/Pass.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
@@ -148,9 +148,9 @@ private:
     return &align != &InvalidPointerElem;
   }
 
-  /// Parses a target data specification string. Returns an error message
-  /// if the string is malformed, or the empty string on success.
-  std::string parseSpecifier(StringRef LayoutDescription);
+  /// Parses a target data specification string. Assert if the string is
+  /// malformed.
+  void parseSpecifier(StringRef LayoutDescription);
 
 public:
   /// Default ctor.
diff --git a/include/llvm/DebugInfo.h b/include/llvm/DebugInfo.h
index 8520ebaba5..43af6ed080 100644
--- a/include/llvm/DebugInfo.h
+++ b/include/llvm/DebugInfo.h
@@ -17,8 +17,8 @@
 #ifndef LLVM_ANALYSIS_DEBUGINFO_H
 #define LLVM_ANALYSIS_DEBUGINFO_H
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Dwarf.h"
 
@@ -71,6 +71,7 @@ namespace llvm {
       return (unsigned)getUInt64Field(Elt);
     }
     uint64_t getUInt64Field(unsigned Elt) const;
+    int64_t getInt64Field(unsigned Elt) const;
     DIDescriptor getDescriptorField(unsigned Elt) const;
 
     template <typename DescTy>
@@ -141,8 +142,8 @@ namespace llvm {
   public:
     explicit DISubrange(const MDNode *N = 0) : DIDescriptor(N) {}
 
-    uint64_t getLo() const { return getUInt64Field(1); }
-    uint64_t getHi() const { return getUInt64Field(2); }
+    int64_t getLo() const { return getInt64Field(1); }
+    int64_t  getCount() const { return getInt64Field(2); }
   };
 
   /// DIArray - This descriptor holds an array of descriptors.
diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h
index 9bfb19d693..5ebf4b0939 100644
--- a/include/llvm/DebugInfo/DIContext.h
+++ b/include/llvm/DebugInfo/DIContext.h
@@ -16,8 +16,8 @@
 #define LLVM_DEBUGINFO_DICONTEXT_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Object/RelocVisitor.h"
diff --git a/include/llvm/DerivedTypes.h b/include/llvm/DerivedTypes.h
index c862c2c8bb..6068b8c337 100644
--- a/include/llvm/DerivedTypes.h
+++ b/include/llvm/DerivedTypes.h
@@ -18,9 +18,9 @@
 #ifndef LLVM_DERIVED_TYPES_H
 #define LLVM_DERIVED_TYPES_H
 
-#include "llvm/Type.h"
-#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Type.h"
 
 namespace llvm {
 
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index 8073d8f92c..2d60c362cc 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -15,19 +15,19 @@
 #ifndef LLVM_EXECUTION_ENGINE_H
 #define LLVM_EXECUTION_ENGINE_H
 
-#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/ValueMap.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/Mutex.h"
+#include "llvm/Support/ValueHandle.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include <vector>
 #include <map>
 #include <string>
+#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
index e6586e778c..7bd1fad934 100644
--- a/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/include/llvm/ExecutionEngine/JITEventListener.h
@@ -18,7 +18,6 @@
 #include "llvm/Config/config.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/DebugLoc.h"
-
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h
index 9089646501..29e01aa7ae 100644
--- a/include/llvm/ExecutionEngine/JITMemoryManager.h
+++ b/include/llvm/ExecutionEngine/JITMemoryManager.h
@@ -12,7 +12,6 @@
 
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/Support/DataTypes.h"
-
 #include <string>
 
 namespace llvm {
diff --git a/include/llvm/ExecutionEngine/ObjectBuffer.h b/include/llvm/ExecutionEngine/ObjectBuffer.h
index 3045fbd60d..96a48b28b8 100644
--- a/include/llvm/ExecutionEngine/ObjectBuffer.h
+++ b/include/llvm/ExecutionEngine/ObjectBuffer.h
@@ -15,10 +15,10 @@
 #ifndef LLVM_EXECUTIONENGINE_OBJECTBUFFER_H
 #define LLVM_EXECUTIONENGINE_OBJECTBUFFER_H
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 
diff --git a/include/llvm/ExecutionEngine/ObjectImage.h b/include/llvm/ExecutionEngine/ObjectImage.h
index a92d1d55df..d09e4deb1a 100644
--- a/include/llvm/ExecutionEngine/ObjectImage.h
+++ b/include/llvm/ExecutionEngine/ObjectImage.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_EXECUTIONENGINE_OBJECTIMAGE_H
 #define LLVM_EXECUTIONENGINE_OBJECTIMAGE_H
 
-#include "llvm/Object/ObjectFile.h"
 #include "llvm/ExecutionEngine/ObjectBuffer.h"
+#include "llvm/Object/ObjectFile.h"
 
 namespace llvm {
 
diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h
index c3d160f012..e2905e37cf 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -36,23 +36,21 @@ public:
   RTDyldMemoryManager() {}
   virtual ~RTDyldMemoryManager();
 
-  /// allocateCodeSection - Allocate a memory block of (at least) the given
-  /// size suitable for executable code. The SectionID is a unique identifier
-  /// assigned by the JIT engine, and optionally recorded by the memory manager
-  /// to access a loaded section.
+  /// Allocate a memory block of (at least) the given size suitable for
+  /// executable code. The SectionID is a unique identifier assigned by the JIT
+  /// engine, and optionally recorded by the memory manager to access a loaded
+  /// section.
   virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
                                        unsigned SectionID) = 0;
 
-  /// allocateDataSection - Allocate a memory block of (at least) the given
-  /// size suitable for data. The SectionID is a unique identifier
-  /// assigned by the JIT engine, and optionally recorded by the memory manager
-  /// to access a loaded section.
+  /// Allocate a memory block of (at least) the given size suitable for data.
+  /// The SectionID is a unique identifier assigned by the JIT engine, and
+  /// optionally recorded by the memory manager to access a loaded section.
   virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
                                        unsigned SectionID, bool IsReadOnly) = 0;
 
-  /// getPointerToNamedFunction - This method returns the address of the
-  /// specified function. As such it is only useful for resolving library
-  /// symbols, not code generated symbols.
+  /// This method returns the address of the specified function. As such it is
+  /// only useful for resolving library symbols, not code generated symbols.
   ///
   /// If AbortOnFailure is false and no function with the given name is
   /// found, this function returns a null pointer. Otherwise, it prints a
@@ -60,13 +58,13 @@ public:
   virtual void *getPointerToNamedFunction(const std::string &Name,
                                           bool AbortOnFailure = true) = 0;
 
-  /// applyPermissions - This method is called when object loading is
-  /// complete and section page permissions can be applied.  It is up to
-  /// the memory manager implementation to decide whether or not to act
-  /// on this method.  The memory manager will typically allocate all 
-  /// sections as read-write and then apply specific permissions when
-  /// this method is called.  Returns true if an error occurred, false
-  /// otherwise.
+  /// This method is called when object loading is complete and section page
+  /// permissions can be applied.  It is up to the memory manager implementation
+  /// to decide whether or not to act on this method.  The memory manager will
+  /// typically allocate all sections as read-write and then apply specific
+  /// permissions when this method is called.
+  ///
+  /// Returns true if an error occurred, false otherwise.
   virtual bool applyPermissions(std::string *ErrMsg = 0) = 0;
 };
 
@@ -86,10 +84,10 @@ public:
   RuntimeDyld(RTDyldMemoryManager *);
   ~RuntimeDyld();
 
-  /// loadObject - prepare the object contained in the input buffer for
-  /// execution.  Ownership of the input buffer is transferred to the
-  /// ObjectImage instance returned from this function if successful.
-  /// In the case of load failure, the input buffer will be deleted.
+  /// Prepare the object contained in the input buffer for execution.
+  /// Ownership of the input buffer is transferred to the ObjectImage
+  /// instance returned from this function if successful. In the case of load
+  /// failure, the input buffer will be deleted.
   ObjectImage *loadObject(ObjectBuffer *InputBuffer);
 
   /// Get the address of our local copy of the symbol. This may or may not
@@ -104,7 +102,7 @@ public:
   /// Resolve the relocations for all symbols we currently know about.
   void resolveRelocations();
 
-  /// mapSectionAddress - map a section to its target address space value.
+  /// Map a section to its target address space value.
   /// Map the address of a JIT section as returned from the memory manager
   /// to the address in the target process as the running code will see it.
   /// This is the address which will be used for relocation resolution.
diff --git a/include/llvm/ExecutionEngine/SectionMemoryManager.h b/include/llvm/ExecutionEngine/SectionMemoryManager.h
new file mode 100644
index 0000000000..ba4ba8d07c
--- /dev/null
+++ b/include/llvm/ExecutionEngine/SectionMemoryManager.h
@@ -0,0 +1,176 @@
+//===- SectionMemoryManager.h - Memory manager for MCJIT/RtDyld -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of a section-based memory manager used by
+// the MCJIT execution engine and RuntimeDyld.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_SECTION_MEMORY_MANAGER_H
+#define LLVM_EXECUTION_ENGINE_SECTION_MEMORY_MANAGER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Memory.h"
+
+namespace llvm {
+
+/// This is a simple memory manager which implements the methods called by
+/// the RuntimeDyld class to allocate memory for section-based loading of
+/// objects, usually those generated by the MCJIT execution engine.
+///
+/// This memory manager allocates all section memory as read-write.  The
+/// RuntimeDyld will copy JITed section memory into these allocated blocks
+/// and perform any necessary linking and relocations.
+///
+/// Any client using this memory manager MUST ensure that section-specific
+/// page permissions have been applied before attempting to execute functions
+/// in the JITed object.  Permissions can be applied either by calling
+/// MCJIT::finalizeObject or by calling SectionMemoryManager::applyPermissions
+/// directly.  Clients of MCJIT should call MCJIT::finalizeObject.
+class SectionMemoryManager : public JITMemoryManager {
+  SectionMemoryManager(const SectionMemoryManager&) LLVM_DELETED_FUNCTION;
+  void operator=(const SectionMemoryManager&) LLVM_DELETED_FUNCTION;
+
+public:
+  SectionMemoryManager() { }
+  virtual ~SectionMemoryManager();
+
+  /// \brief Allocates a memory block of (at least) the given size suitable for
+  /// executable code.
+  ///
+  /// The value of \p Alignment must be a power of two.  If \p Alignment is zero
+  /// a default alignment of 16 will be used.
+  virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                                       unsigned SectionID);
+
+  /// \brief Allocates a memory block of (at least) the given size suitable for
+  /// executable code.
+  ///
+  /// The value of \p Alignment must be a power of two.  If \p Alignment is zero
+  /// a default alignment of 16 will be used.
+  virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                                       unsigned SectionID,
+                                       bool isReadOnly);
+
+  /// \brief Applies section-specific memory permissions.
+  ///
+  /// This method is called when object loading is complete and section page
+  /// permissions can be applied.  It is up to the memory manager implementation
+  /// to decide whether or not to act on this method.  The memory manager will
+  /// typically allocate all sections as read-write and then apply specific
+  /// permissions when this method is called.  Code sections cannot be executed
+  /// until this function has been called.
+  ///
+  /// \returns true if an error occurred, false otherwise.
+  virtual bool applyPermissions(std::string *ErrMsg = 0);
+
+  /// This method returns the address of the specified function. As such it is
+  /// only useful for resolving library symbols, not code generated symbols.
+  ///
+  /// If \p AbortOnFailure is false and no function with the given name is
+  /// found, this function returns a null pointer. Otherwise, it prints a
+  /// message to stderr and aborts.
+  virtual void *getPointerToNamedFunction(const std::string &Name,
+                                          bool AbortOnFailure = true);
+
+  /// \brief Invalidate instruction cache for code sections.
+  ///
+  /// Some platforms with separate data cache and instruction cache require
+  /// explicit cache flush, otherwise JIT code manipulations (like resolved
+  /// relocations) will get to the data cache but not to the instruction cache.
+  ///
+  /// This method is not called by RuntimeDyld or MCJIT during the load
+  /// process.  Clients may call this function when needed.  See the lli
+  /// tool for example use.
+  virtual void invalidateInstructionCache();
+
+private:
+  struct MemoryGroup {
+      SmallVector<sys::MemoryBlock, 16> AllocatedMem;
+      SmallVector<sys::MemoryBlock, 16> FreeMem;
+      sys::MemoryBlock Near;
+  };
+
+  uint8_t *allocateSection(MemoryGroup &MemGroup, uintptr_t Size,
+                           unsigned Alignment);
+
+  error_code applyMemoryGroupPermissions(MemoryGroup &MemGroup,
+                                         unsigned Permissions);
+
+  MemoryGroup CodeMem;
+  MemoryGroup RWDataMem;
+  MemoryGroup RODataMem;
+
+public:
+  ///
+  /// Functions below are not used by MCJIT or RuntimeDyld, but must be
+  /// implemented because they are declared as pure virtuals in the base class.
+  ///
+
+  virtual void setMemoryWritable() {
+    llvm_unreachable("Unexpected call!");
+  }
+  virtual void setMemoryExecutable() {
+    llvm_unreachable("Unexpected call!");
+  }
+  virtual void setPoisonMemory(bool poison) {
+    llvm_unreachable("Unexpected call!");
+  }
+  virtual void AllocateGOT() {
+    llvm_unreachable("Unexpected call!");
+  }
+  virtual uint8_t *getGOTBase() const {
+    llvm_unreachable("Unexpected call!");
+    return 0;
+  }
+  virtual uint8_t *startFunctionBody(const Function *F,
+                                     uintptr_t &ActualSize){
+    llvm_unreachable("Unexpected call!");
+    return 0;
+  }
+  virtual uint8_t *allocateStub(const GlobalValue *F, unsigned StubSize,
+                                unsigned Alignment) {
+    llvm_unreachable("Unexpected call!");
+    return 0;
+  }
+  virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart,
+                               uint8_t *FunctionEnd) {
+    llvm_unreachable("Unexpected call!");
+  }
+  virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
+    llvm_unreachable("Unexpected call!");
+    return 0;
+  }
+  virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
+    llvm_unreachable("Unexpected call!");
+    return 0;
+  }
+  virtual void deallocateFunctionBody(void *Body) {
+    llvm_unreachable("Unexpected call!");
+  }
+  virtual uint8_t *startExceptionTable(const Function *F,
+                                       uintptr_t &ActualSize) {
+    llvm_unreachable("Unexpected call!");
+    return 0;
+  }
+  virtual void endExceptionTable(const Function *F, uint8_t *TableStart,
+                                 uint8_t *TableEnd, uint8_t *FrameRegister) {
+    llvm_unreachable("Unexpected call!");
+  }
+  virtual void deallocateExceptionTable(void *ET) {
+    llvm_unreachable("Unexpected call!");
+  }
+};
+
+}
+
+#endif // LLVM_EXECUTION_ENGINE_SECTION_MEMORY_MANAGER_H
+
diff --git a/include/llvm/Function.h b/include/llvm/Function.h
index e211e9ab52..b49b8c1457 100644
--- a/include/llvm/Function.h
+++ b/include/llvm/Function.h
@@ -18,11 +18,11 @@
 #ifndef LLVM_FUNCTION_H
 #define LLVM_FUNCTION_H
 
-#include "llvm/GlobalValue.h"
-#include "llvm/CallingConv.h"
-#include "llvm/BasicBlock.h"
 #include "llvm/Argument.h"
 #include "llvm/Attributes.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalValue.h"
 #include "llvm/Support/Compiler.h"
 
 namespace llvm {
@@ -85,7 +85,7 @@ private:
   BasicBlockListType  BasicBlocks;        ///< The basic blocks
   mutable ArgumentListType ArgumentList;  ///< The formal arguments
   ValueSymbolTable *SymTab;               ///< Symbol table of args/instructions
-  AttrListPtr AttributeList;              ///< Parameter attributes
+  AttributeSet AttributeList;              ///< Parameter attributes
 
   // HasLazyArguments is stored in Value::SubclassData.
   /*bool HasLazyArguments;*/
@@ -162,11 +162,11 @@ public:
   
   /// getAttributes - Return the attribute list for this Function.
   ///
-  const AttrListPtr &getAttributes() const { return AttributeList; }
+  const AttributeSet &getAttributes() const { return AttributeList; }
 
   /// setAttributes - Set the attribute list for this Function.
   ///
-  void setAttributes(const AttrListPtr &attrs) { AttributeList = attrs; }
+  void setAttributes(const AttributeSet &attrs) { AttributeList = attrs; }
 
   /// getFnAttributes - Return the function attributes for querying.
   ///
@@ -178,7 +178,7 @@ public:
   ///
   void addFnAttr(Attributes::AttrVal N) { 
     // Function Attributes are stored at ~0 index 
-    addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), N));
+    addAttribute(AttributeSet::FunctionIndex, Attributes::get(getContext(), N));
   }
 
   /// removeFnAttr - Remove function attributes from this function.
diff --git a/include/llvm/GlobalAlias.h b/include/llvm/GlobalAlias.h
index d0f014733f..ff0c1b17f9 100644
--- a/include/llvm/GlobalAlias.h
+++ b/include/llvm/GlobalAlias.h
@@ -15,10 +15,10 @@
 #ifndef LLVM_GLOBAL_ALIAS_H
 #define LLVM_GLOBAL_ALIAS_H
 
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist_node.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/OperandTraits.h"
-#include "llvm/ADT/ilist_node.h"
-#include "llvm/ADT/Twine.h"
 
 namespace llvm {
 
diff --git a/include/llvm/GlobalVariable.h b/include/llvm/GlobalVariable.h
index b9d3f68642..79463fbdf0 100644
--- a/include/llvm/GlobalVariable.h
+++ b/include/llvm/GlobalVariable.h
@@ -20,10 +20,10 @@
 #ifndef LLVM_GLOBAL_VARIABLE_H
 #define LLVM_GLOBAL_VARIABLE_H
 
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist_node.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/OperandTraits.h"
-#include "llvm/ADT/ilist_node.h"
-#include "llvm/ADT/Twine.h"
 
 namespace llvm {
 
diff --git a/include/llvm/IRBuilder.h b/include/llvm/IRBuilder.h
index f63a16051e..bb86875828 100644
--- a/include/llvm/IRBuilder.h
+++ b/include/llvm/IRBuilder.h
@@ -15,13 +15,14 @@
 #ifndef LLVM_IRBUILDER_H
 #define LLVM_IRBUILDER_H
 
-#include "llvm/Instructions.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/DataLayout.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
 #include "llvm/Support/ConstantFolder.h"
 
 namespace llvm {
@@ -238,7 +239,7 @@ public:
     return Type::getInt16Ty(Context);
   }
 
-  /// getInt32Ty - Fetch the type resepresenting a 32-bit integer.
+  /// getInt32Ty - Fetch the type representing a 32-bit integer.
   IntegerType *getInt32Ty() {
     return Type::getInt32Ty(Context);
   }
@@ -329,7 +330,10 @@ private:
 ///
 /// Note that the builder does not expose the full generality of LLVM
 /// instructions.  For access to extra instruction properties, use the mutators
-/// (e.g. setVolatile) on the instructions after they have been created.
+/// (e.g. setVolatile) on the instructions after they have been
+/// created. Convenience state exists to specify fast-math flags and fp-math
+/// tags.
+///
 /// The first template argument handles whether or not to preserve names in the
 /// final instruction output. This defaults to on.  The second template argument
 /// specifies a class to use for creating constants.  This defaults to creating
@@ -341,36 +345,40 @@ template<bool preserveNames = true, typename T = ConstantFolder,
 class IRBuilder : public IRBuilderBase, public Inserter {
   T Folder;
   MDNode *DefaultFPMathTag;
+  FastMathFlags FMF;
 public:
   IRBuilder(LLVMContext &C, const T &F, const Inserter &I = Inserter(),
             MDNode *FPMathTag = 0)
-    : IRBuilderBase(C), Inserter(I), Folder(F), DefaultFPMathTag(FPMathTag) {
+    : IRBuilderBase(C), Inserter(I), Folder(F), DefaultFPMathTag(FPMathTag),
+      FMF() {
   }
 
-  explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = 0) : IRBuilderBase(C),
-    Folder(), DefaultFPMathTag(FPMathTag) {
+  explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = 0)
+    : IRBuilderBase(C), Folder(), DefaultFPMathTag(FPMathTag), FMF() {
   }
 
   explicit IRBuilder(BasicBlock *TheBB, const T &F, MDNode *FPMathTag = 0)
     : IRBuilderBase(TheBB->getContext()), Folder(F),
-      DefaultFPMathTag(FPMathTag) {
+      DefaultFPMathTag(FPMathTag), FMF() {
     SetInsertPoint(TheBB);
   }
 
   explicit IRBuilder(BasicBlock *TheBB, MDNode *FPMathTag = 0)
     : IRBuilderBase(TheBB->getContext()), Folder(),
-      DefaultFPMathTag(FPMathTag) {
+      DefaultFPMathTag(FPMathTag), FMF() {
     SetInsertPoint(TheBB);
   }
 
   explicit IRBuilder(Instruction *IP, MDNode *FPMathTag = 0)
-    : IRBuilderBase(IP->getContext()), Folder(), DefaultFPMathTag(FPMathTag) {
+    : IRBuilderBase(IP->getContext()), Folder(), DefaultFPMathTag(FPMathTag),
+      FMF() {
     SetInsertPoint(IP);
     SetCurrentDebugLocation(IP->getDebugLoc());
   }
 
   explicit IRBuilder(Use &U, MDNode *FPMathTag = 0)
-    : IRBuilderBase(U->getContext()), Folder(), DefaultFPMathTag(FPMathTag) {
+    : IRBuilderBase(U->getContext()), Folder(), DefaultFPMathTag(FPMathTag),
+      FMF() {
     SetInsertPoint(U);
     SetCurrentDebugLocation(cast<Instruction>(U.getUser())->getDebugLoc());
   }
@@ -378,13 +386,13 @@ public:
   IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T& F,
             MDNode *FPMathTag = 0)
     : IRBuilderBase(TheBB->getContext()), Folder(F),
-      DefaultFPMathTag(FPMathTag) {
+      DefaultFPMathTag(FPMathTag), FMF() {
     SetInsertPoint(TheBB, IP);
   }
 
   IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, MDNode *FPMathTag = 0)
     : IRBuilderBase(TheBB->getContext()), Folder(),
-      DefaultFPMathTag(FPMathTag) {
+      DefaultFPMathTag(FPMathTag), FMF() {
     SetInsertPoint(TheBB, IP);
   }
 
@@ -394,9 +402,18 @@ public:
   /// getDefaultFPMathTag - Get the floating point math metadata being used.
   MDNode *getDefaultFPMathTag() const { return DefaultFPMathTag; }
 
+  /// Get the flags to be applied to created floating point ops
+  FastMathFlags getFastMathFlags() const { return FMF; }
+
+  /// Clear the fast-math flags.
+  void clearFastMathFlags() { FMF.clear(); }
+
   /// SetDefaultFPMathTag - Set the floating point math metadata to be used.
   void SetDefaultFPMathTag(MDNode *FPMathTag) { DefaultFPMathTag = FPMathTag; }
 
+  /// Set the fast-math flags to be used with generated fp-math operators
+  void SetFastMathFlags(FastMathFlags NewFMF) { FMF = NewFMF; }
+
   /// isNamePreserving - Return true if this builder is configured to actually
   /// add the requested names to IR created through it.
   bool isNamePreserving() const { return preserveNames; }
@@ -535,11 +552,14 @@ private:
     return BO;
   }
 
-  Instruction *AddFPMathTag(Instruction *I, MDNode *FPMathTag) const {
+  Instruction *AddFPMathAttributes(Instruction *I,
+                                   MDNode *FPMathTag,
+                                   FastMathFlags FMF) const {
     if (!FPMathTag)
       FPMathTag = DefaultFPMathTag;
     if (FPMathTag)
       I->setMetadata(LLVMContext::MD_fpmath, FPMathTag);
+    I->setFastMathFlags(FMF);
     return I;
   }
 public:
@@ -562,8 +582,8 @@ public:
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFAdd(LC, RC), Name);
-    return Insert(AddFPMathTag(BinaryOperator::CreateFAdd(LHS, RHS),
-                               FPMathTag), Name);
+    return Insert(AddFPMathAttributes(BinaryOperator::CreateFAdd(LHS, RHS),
+                                      FPMathTag, FMF), Name);
   }
   Value *CreateSub(Value *LHS, Value *RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
@@ -584,8 +604,8 @@ public:
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFSub(LC, RC), Name);
-    return Insert(AddFPMathTag(BinaryOperator::CreateFSub(LHS, RHS),
-                               FPMathTag), Name);
+    return Insert(AddFPMathAttributes(BinaryOperator::CreateFSub(LHS, RHS),
+                                      FPMathTag, FMF), Name);
   }
   Value *CreateMul(Value *LHS, Value *RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
@@ -606,8 +626,8 @@ public:
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFMul(LC, RC), Name);
-    return Insert(AddFPMathTag(BinaryOperator::CreateFMul(LHS, RHS),
-                               FPMathTag), Name);
+    return Insert(AddFPMathAttributes(BinaryOperator::CreateFMul(LHS, RHS),
+                                      FPMathTag, FMF), Name);
   }
   Value *CreateUDiv(Value *LHS, Value *RHS, const Twine &Name = "",
                     bool isExact = false) {
@@ -638,8 +658,8 @@ public:
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFDiv(LC, RC), Name);
-    return Insert(AddFPMathTag(BinaryOperator::CreateFDiv(LHS, RHS),
-                               FPMathTag), Name);
+    return Insert(AddFPMathAttributes(BinaryOperator::CreateFDiv(LHS, RHS),
+                                      FPMathTag, FMF), Name);
   }
   Value *CreateURem(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
@@ -658,8 +678,8 @@ public:
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateFRem(LC, RC), Name);
-    return Insert(AddFPMathTag(BinaryOperator::CreateFRem(LHS, RHS),
-                               FPMathTag), Name);
+    return Insert(AddFPMathAttributes(BinaryOperator::CreateFRem(LHS, RHS),
+                                      FPMathTag, FMF), Name);
   }
 
   Value *CreateShl(Value *LHS, Value *RHS, const Twine &Name = "",
@@ -788,7 +808,8 @@ public:
   Value *CreateFNeg(Value *V, const Twine &Name = "", MDNode *FPMathTag = 0) {
     if (Constant *VC = dyn_cast<Constant>(V))
       return Insert(Folder.CreateFNeg(VC), Name);
-    return Insert(AddFPMathTag(BinaryOperator::CreateFNeg(V), FPMathTag), Name);
+    return Insert(AddFPMathAttributes(BinaryOperator::CreateFNeg(V),
+                                      FPMathTag, FMF), Name);
   }
   Value *CreateNot(Value *V, const Twine &Name = "") {
     if (Constant *VC = dyn_cast<Constant>(V))
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index c794308f73..d0b6acff5a 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -110,6 +110,8 @@ void initializeExpandPostRAPass(PassRegistry&);
 void initializePathProfilerPass(PassRegistry&);
 void initializeGCOVProfilerPass(PassRegistry&);
 void initializeAddressSanitizerPass(PassRegistry&);
+void initializeAddressSanitizerModulePass(PassRegistry&);
+void initializeMemorySanitizerPass(PassRegistry&);
 void initializeThreadSanitizerPass(PassRegistry&);
 void initializeEarlyCSEPass(PassRegistry&);
 void initializeExpandISelPseudosPass(PassRegistry&);
@@ -172,7 +174,6 @@ void initializeMachineDominatorTreePass(PassRegistry&);
 void initializeMachinePostDominatorTreePass(PassRegistry&);
 void initializeMachineLICMPass(PassRegistry&);
 void initializeMachineLoopInfoPass(PassRegistry&);
-void initializeMachineLoopRangesPass(PassRegistry&);
 void initializeMachineModuleInfoPass(PassRegistry&);
 void initializeMachineSchedulerPass(PassRegistry&);
 void initializeMachineSinkingPass(PassRegistry&);
diff --git a/include/llvm/InlineAsm.h b/include/llvm/InlineAsm.h
index b5e0fd4eff..701134d526 100644
--- a/include/llvm/InlineAsm.h
+++ b/include/llvm/InlineAsm.h
@@ -16,8 +16,8 @@
 #ifndef LLVM_INLINEASM_H
 #define LLVM_INLINEASM_H
 
-#include "llvm/Value.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Value.h"
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/Support/InstVisitor.h b/include/llvm/InstVisitor.h
index 6dfb4dec0e..c2d943b902 100644
--- a/include/llvm/Support/InstVisitor.h
+++ b/include/llvm/InstVisitor.h
@@ -1,4 +1,4 @@
-//===- llvm/Support/InstVisitor.h - Define instruction visitors -*- C++ -*-===//
+//===- llvm/InstVisitor.h - Instruction visitor templates -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,13 +8,13 @@
 //===----------------------------------------------------------------------===//
 
 
-#ifndef LLVM_SUPPORT_INSTVISITOR_H
-#define LLVM_SUPPORT_INSTVISITOR_H
+#ifndef LLVM_INSTVISITOR_H
+#define LLVM_INSTVISITOR_H
 
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/Module.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h
index da17f3b80d..31a3dc8f01 100644
--- a/include/llvm/InstrTypes.h
+++ b/include/llvm/InstrTypes.h
@@ -16,10 +16,10 @@
 #ifndef LLVM_INSTRUCTION_TYPES_H
 #define LLVM_INSTRUCTION_TYPES_H
 
+#include "llvm/ADT/Twine.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/Instruction.h"
 #include "llvm/OperandTraits.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/ADT/Twine.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Instruction.h b/include/llvm/Instruction.h
index 0768df1684..595ad179e0 100644
--- a/include/llvm/Instruction.h
+++ b/include/llvm/Instruction.h
@@ -15,12 +15,13 @@
 #ifndef LLVM_INSTRUCTION_H
 #define LLVM_INSTRUCTION_H
 
-#include "llvm/User.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/Support/DebugLoc.h"
+#include "llvm/User.h"
 
 namespace llvm {
 
+class FastMathFlags;
 class LLVMContext;
 class MDNode;
 
@@ -176,6 +177,59 @@ public:
   /// getDebugLoc - Return the debug location for this node as a DebugLoc.
   const DebugLoc &getDebugLoc() const { return DbgLoc; }
 
+  /// Set or clear the unsafe-algebra flag on this instruction, which must be an
+  /// operator which supports this flag. See LangRef.html for the meaning of
+  /// this flag.
+  void setHasUnsafeAlgebra(bool B);
+
+  /// Set or clear the no-nans flag on this instruction, which must be an
+  /// operator which supports this flag. See LangRef.html for the meaning of
+  /// this flag.
+  void setHasNoNaNs(bool B);
+
+  /// Set or clear the no-infs flag on this instruction, which must be an
+  /// operator which supports this flag. See LangRef.html for the meaning of
+  /// this flag.
+  void setHasNoInfs(bool B);
+
+  /// Set or clear the no-signed-zeros flag on this instruction, which must be
+  /// an operator which supports this flag. See LangRef.html for the meaning of
+  /// this flag.
+  void setHasNoSignedZeros(bool B);
+
+  /// Set or clear the allow-reciprocal flag on this instruction, which must be
+  /// an operator which supports this flag. See LangRef.html for the meaning of
+  /// this flag.
+  void setHasAllowReciprocal(bool B);
+
+  /// Convenience function for setting all the fast-math flags on this
+  /// instruction, which must be an operator which supports these flags. See
+  /// LangRef.html for the meaning of these flats.
+  void setFastMathFlags(FastMathFlags FMF);
+
+  /// Determine whether the unsafe-algebra flag is set.
+  bool hasUnsafeAlgebra() const;
+
+  /// Determine whether the no-NaNs flag is set.
+  bool hasNoNaNs() const;
+
+  /// Determine whether the no-infs flag is set.
+  bool hasNoInfs() const;
+
+  /// Determine whether the no-signed-zeros flag is set.
+  bool hasNoSignedZeros() const;
+
+  /// Determine whether the allow-reciprocal flag is set.
+  bool hasAllowReciprocal() const;
+
+  /// Convenience function for getting all the fast-math flags, which must be an
+  /// operator which supports these flags. See LangRef.html for the meaning of
+  /// these flats.
+  FastMathFlags getFastMathFlags() const;
+
+  /// Copy I's fast-math flags
+  void copyFastMathFlags(const Instruction *I);
+
 private:
   /// hasMetadataHashEntry - Return true if we have an entry in the on-the-side
   /// metadata hash.
@@ -202,7 +256,7 @@ public:
   ///
   /// In LLVM, the Add, Mul, And, Or, and Xor operators are associative.
   ///
-  bool isAssociative() const { return isAssociative(getOpcode()); }
+  bool isAssociative() const;
   static bool isAssociative(unsigned op);
 
   /// isCommutative - Return true if the instruction is commutative:
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
index 69593b48c1..9c5a2db2fe 100644
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@@ -16,15 +16,15 @@
 #ifndef LLVM_INSTRUCTIONS_H
 #define LLVM_INSTRUCTIONS_H
 
-#include "llvm/InstrTypes.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Attributes.h"
 #include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/IntegersSubset.h"
 #include "llvm/Support/IntegersSubsetMapping.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ErrorHandling.h"
 #include <iterator>
 
 namespace llvm {
@@ -1156,7 +1156,7 @@ public:
 /// hold the calling convention of the call.
 ///
 class CallInst : public Instruction {
-  AttrListPtr AttributeList; ///< parameter attributes for call
+  AttributeSet AttributeList; ///< parameter attributes for call
   CallInst(const CallInst &CI);
   void init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr);
   void init(Value *Func, const Twine &NameStr);
@@ -1254,11 +1254,11 @@ public:
 
   /// getAttributes - Return the parameter attributes for this call.
   ///
-  const AttrListPtr &getAttributes() const { return AttributeList; }
+  const AttributeSet &getAttributes() const { return AttributeList; }
 
   /// setAttributes - Set the parameter attributes for this call.
   ///
-  void setAttributes(const AttrListPtr &Attrs) { AttributeList = Attrs; }
+  void setAttributes(const AttributeSet &Attrs) { AttributeList = Attrs; }
 
   /// addAttribute - adds the attribute to the list of attributes.
   void addAttribute(unsigned i, Attributes attr);
@@ -1280,7 +1280,7 @@ public:
   /// \brief Return true if the call should not be inlined.
   bool isNoInline() const { return hasFnAttr(Attributes::NoInline); }
   void setIsNoInline() {
-    addAttribute(AttrListPtr::FunctionIndex,
+    addAttribute(AttributeSet::FunctionIndex,
                  Attributes::get(getContext(), Attributes::NoInline));
   }
 
@@ -1289,7 +1289,7 @@ public:
     return hasFnAttr(Attributes::ReturnsTwice);
   }
   void setCanReturnTwice() {
-    addAttribute(AttrListPtr::FunctionIndex,
+    addAttribute(AttributeSet::FunctionIndex,
                  Attributes::get(getContext(), Attributes::ReturnsTwice));
   }
 
@@ -1298,7 +1298,7 @@ public:
     return hasFnAttr(Attributes::ReadNone);
   }
   void setDoesNotAccessMemory() {
-    addAttribute(AttrListPtr::FunctionIndex,
+    addAttribute(AttributeSet::FunctionIndex,
                  Attributes::get(getContext(), Attributes::ReadNone));
   }
 
@@ -1307,21 +1307,21 @@ public:
     return doesNotAccessMemory() || hasFnAttr(Attributes::ReadOnly);
   }
   void setOnlyReadsMemory() {
-    addAttribute(AttrListPtr::FunctionIndex,
+    addAttribute(AttributeSet::FunctionIndex,
                  Attributes::get(getContext(), Attributes::ReadOnly));
   }
 
   /// \brief Determine if the call cannot return.
   bool doesNotReturn() const { return hasFnAttr(Attributes::NoReturn); }
   void setDoesNotReturn() {
-    addAttribute(AttrListPtr::FunctionIndex,
+    addAttribute(AttributeSet::FunctionIndex,
                  Attributes::get(getContext(), Attributes::NoReturn));
   }
 
   /// \brief Determine if the call cannot unwind.
   bool doesNotThrow() const { return hasFnAttr(Attributes::NoUnwind); }
   void setDoesNotThrow() {
-    addAttribute(AttrListPtr::FunctionIndex,
+    addAttribute(AttributeSet::FunctionIndex,
                  Attributes::get(getContext(), Attributes::NoUnwind));
   }
 
@@ -2942,7 +2942,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value)
 /// calling convention of the call.
 ///
 class InvokeInst : public TerminatorInst {
-  AttrListPtr AttributeList;
+  AttributeSet AttributeList;
   InvokeInst(const InvokeInst &BI);
   void init(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
             ArrayRef<Value *> Args, const Twine &NameStr);
@@ -3003,11 +3003,11 @@ public:
 
   /// getAttributes - Return the parameter attributes for this invoke.
   ///
-  const AttrListPtr &getAttributes() const { return AttributeList; }
+  const AttributeSet &getAttributes() const { return AttributeList; }
 
   /// setAttributes - Set the parameter attributes for this invoke.
   ///
-  void setAttributes(const AttrListPtr &Attrs) { AttributeList = Attrs; }
+  void setAttributes(const AttributeSet &Attrs) { AttributeList = Attrs; }
 
   /// addAttribute - adds the attribute to the list of attributes.
   void addAttribute(unsigned i, Attributes attr);
@@ -3029,7 +3029,7 @@ public:
   /// \brief Return true if the call should not be inlined.
   bool isNoInline() const { return hasFnAttr(Attributes::NoInline); }
   void setIsNoInline() {
-    addAttribute(AttrListPtr::FunctionIndex,
+    addAttribute(AttributeSet::FunctionIndex,
                  Attributes::get(getContext(), Attributes::NoInline));
   }
 
@@ -3038,7 +3038,7 @@ public:
     return hasFnAttr(Attributes::ReadNone);
   }
   void setDoesNotAccessMemory() {
-    addAttribute(AttrListPtr::FunctionIndex,
+    addAttribute(AttributeSet::FunctionIndex,
                  Attributes::get(getContext(), Attributes::ReadNone));
   }
 
@@ -3047,21 +3047,21 @@ public:
     return doesNotAccessMemory() || hasFnAttr(Attributes::ReadOnly);
   }
   void setOnlyReadsMemory() {
-    addAttribute(AttrListPtr::FunctionIndex,
+    addAttribute(AttributeSet::FunctionIndex,
                  Attributes::get(getContext(), Attributes::ReadOnly));
   }
 
   /// \brief Determine if the call cannot return.
   bool doesNotReturn() const { return hasFnAttr(Attributes::NoReturn); }
   void setDoesNotReturn() {
-    addAttribute(AttrListPtr::FunctionIndex,
+    addAttribute(AttributeSet::FunctionIndex,
                  Attributes::get(getContext(), Attributes::NoReturn));
   }
 
   /// \brief Determine if the call cannot unwind.
   bool doesNotThrow() const { return hasFnAttr(Attributes::NoUnwind); }
   void setDoesNotThrow() {
-    addAttribute(AttrListPtr::FunctionIndex,
+    addAttribute(AttributeSet::FunctionIndex,
                  Attributes::get(getContext(), Attributes::NoUnwind));
   }
 
diff --git a/include/llvm/Intrinsics.h b/include/llvm/Intrinsics.h
index 3108a8e525..c1fe4c67c4 100644
--- a/include/llvm/Intrinsics.h
+++ b/include/llvm/Intrinsics.h
@@ -26,7 +26,7 @@ class FunctionType;
 class Function;
 class LLVMContext;
 class Module;
-class AttrListPtr;
+class AttributeSet;
 
 /// Intrinsic Namespace - This namespace contains an enum with a value for
 /// every intrinsic/builtin function known by LLVM.  These enum values are
@@ -58,7 +58,7 @@ namespace Intrinsic {
 
   /// Intrinsic::getAttributes(ID) - Return the attributes for an intrinsic.
   ///
-  AttrListPtr getAttributes(LLVMContext &C, ID id);
+  AttributeSet getAttributes(LLVMContext &C, ID id);
 
   /// Intrinsic::getDeclaration(M, ID) - Create or insert an LLVM Function
   /// declaration for an intrinsic, and return it.
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 806e4b37b7..baf8550edc 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -19,20 +19,20 @@
 #include "llvm/Analysis/DomPrinter.h"
 #include "llvm/Analysis/FindUsedTypes.h"
 #include "llvm/Analysis/IntervalPartition.h"
+#include "llvm/Analysis/Lint.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/RegionPass.h"
 #include "llvm/Analysis/RegionPrinter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/Lint.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Function.h"
-#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Vectorize.h"
 #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Transforms/Vectorize.h"
 #include <cstdlib>
 
 namespace {
diff --git a/include/llvm/LinkAllVMCore.h b/include/llvm/LinkAllVMCore.h
index 83684c0fb6..5a85c04a18 100644
--- a/include/llvm/LinkAllVMCore.h
+++ b/include/llvm/LinkAllVMCore.h
@@ -16,13 +16,15 @@
 #ifndef LLVM_LINKALLVMCORE_H
 #define LLVM_LINKALLVMCORE_H
 
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Analysis/Verifier.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Memory.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/Path.h"
@@ -30,8 +32,6 @@
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/TimeValue.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/MathExtras.h"
 #include <cstdlib>
 
 namespace {
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index 29ec1020c3..44087650c2 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -16,8 +16,8 @@
 #ifndef LLVM_TARGET_ASM_INFO_H
 #define LLVM_TARGET_ASM_INFO_H
 
-#include "llvm/MC/MachineLocation.h"
 #include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MachineLocation.h"
 #include <cassert>
 #include <vector>
 
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 5a8830cb66..111ad484ff 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -10,10 +10,10 @@
 #ifndef LLVM_MC_MCCONTEXT_H
 #define LLVM_MC_MCCONTEXT_H
 
-#include "llvm/MC/SectionKind.h"
-#include "llvm/MC/MCDwarf.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/SectionKind.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
@@ -137,11 +137,16 @@ namespace llvm {
 
     void *MachOUniquingMap, *ELFUniquingMap, *COFFUniquingMap;
 
+    /// Do automatic initialization in constructor and finalization in
+    /// destructor
+    bool AutoInitializationFinalization;
+
     MCSymbol *CreateSymbol(StringRef Name);
 
   public:
     explicit MCContext(const MCAsmInfo &MAI, const MCRegisterInfo &MRI,
-                       const MCObjectFileInfo *MOFI, const SourceMgr *Mgr = 0);
+                       const MCObjectFileInfo *MOFI, const SourceMgr *Mgr = 0,
+                       bool AutoInitializationFinalization = true);
     ~MCContext();
 
     const SourceMgr *getSourceManager() const { return SrcMgr; }
@@ -154,6 +159,17 @@ namespace llvm {
 
     void setAllowTemporaryLabels(bool Value) { AllowTemporaryLabels = Value; }
 
+    /// @name Module Lifetime Management
+    /// @{
+
+    /// doInitialization - prepare to process a new module
+    void doInitialization();
+
+    /// doFinalization - clean up state from the current module
+    void doFinalization();
+
+    /// @}
+
     /// @name Symbol Management
     /// @{
 
diff --git a/include/llvm/MC/MCDisassembler.h b/include/llvm/MC/MCDisassembler.h
index 53a9ce0a36..392d01592c 100644
--- a/include/llvm/MC/MCDisassembler.h
+++ b/include/llvm/MC/MCDisassembler.h
@@ -9,8 +9,8 @@
 #ifndef MCDISASSEMBLER_H
 #define MCDISASSEMBLER_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm-c/Disassembler.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h
index 92e76121c0..3160b616f7 100644
--- a/include/llvm/MC/MCDwarf.h
+++ b/include/llvm/MC/MCDwarf.h
@@ -16,9 +16,9 @@
 #define LLVM_MC_MCDWARF_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/raw_ostream.h"
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h
index 8b0f191792..9842f8aa45 100644
--- a/include/llvm/MC/MCELFObjectWriter.h
+++ b/include/llvm/MC/MCELFObjectWriter.h
@@ -72,7 +72,7 @@ public:
         // @LOCALMOD-BEGIN
         // This shouldn't be needed anymore (sel_ldr doesn't check for it),
         // but removing it may require some changes in binutils also.
-      case Triple::NativeClient:
+      case Triple::NaCl:
         return ELF::ELFOSABI_NACL;
         // @LOCALMOD-END
       default:
diff --git a/include/llvm/MC/MCELFStreamer.h b/include/llvm/MC/MCELFStreamer.h
new file mode 100644
index 0000000000..abe0f1f924
--- /dev/null
+++ b/include/llvm/MC/MCELFStreamer.h
@@ -0,0 +1,109 @@
+//===- MCELFStreamer.h - MCStreamer ELF Object File Interface ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELFSTREAMER_H
+#define LLVM_MC_MCELFSTREAMER_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/DataTypes.h"
+
+#include <vector>
+
+namespace llvm {
+class MCAsmBackend;
+class MCAssembler;
+class MCCodeEmitter;
+class MCExpr;
+class MCInst;
+class MCSymbol;
+class MCSymbolData;
+class raw_ostream;
+
+class MCELFStreamer : public MCObjectStreamer {
+public:
+  MCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                raw_ostream &OS, MCCodeEmitter *Emitter)
+    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+  MCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+                raw_ostream &OS, MCCodeEmitter *Emitter,
+                MCAssembler *Assembler)
+    : MCObjectStreamer(Context, TAB, OS, Emitter, Assembler) {}
+
+  virtual ~MCELFStreamer();
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void InitSections();
+  virtual void ChangeSection(const MCSection *Section);
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment);
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+  virtual void EmitCOFFSymbolType(int Type);
+  virtual void EndCOFFSymbolDef();
+
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                     unsigned ByteAlignment);
+
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            uint64_t Size = 0, unsigned ByteAlignment = 0);
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment = 0);
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             unsigned AddrSpace);
+
+  virtual void EmitFileDirective(StringRef Filename);
+
+  virtual void EmitTCEntry(const MCSymbol &S);
+
+  virtual void FinishImpl();
+  //TEMP LOCALMOD until we go to upstream bundling
+  virtual void EmitAssignment(llvm::MCSymbol*, const llvm::MCExpr*);
+  /// @}
+
+private:
+  virtual void EmitInstToFragment(const MCInst &Inst);
+  virtual void EmitInstToData(const MCInst &Inst);
+
+  void fixSymbolsInTLSFixups(const MCExpr *expr);
+
+  struct LocalCommon {
+    MCSymbolData *SD;
+    uint64_t Size;
+    unsigned ByteAlignment;
+  };
+
+  std::vector<LocalCommon> LocalCommons;
+
+  SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
+
+
+  void SetSection(StringRef Section, unsigned Type, unsigned Flags,
+                  SectionKind Kind);
+  void SetSectionData();
+  void SetSectionText();
+  void SetSectionBss();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index 1007aa5264..93872edf2f 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -174,9 +174,13 @@ public:
     VK_PPC_DARWIN_HA16,  // ha16(symbol)
     VK_PPC_DARWIN_LO16,  // lo16(symbol)
     VK_PPC_GAS_HA16,     // symbol@ha
-    VK_PPC_GAS_LO16,      // symbol@l
+    VK_PPC_GAS_LO16,     // symbol@l
     VK_PPC_TPREL16_HA,   // symbol@tprel@ha
     VK_PPC_TPREL16_LO,   // symbol@tprel@l
+    VK_PPC_TOC16_HA,     // symbol@toc@ha
+    VK_PPC_TOC16_LO,     // symbol@toc@l
+    VK_PPC_GOT_TPREL16_DS, // symbol@got@tprel
+    VK_PPC_TLS,            // symbol@tls
 
     VK_Mips_GPREL,
     VK_Mips_GOT_CALL,
diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h
index 3b9420a403..a18cbd94bb 100644
--- a/include/llvm/MC/MCInstPrinter.h
+++ b/include/llvm/MC/MCInstPrinter.h
@@ -10,6 +10,9 @@
 #ifndef LLVM_MC_MCINSTPRINTER_H
 #define LLVM_MC_MCINSTPRINTER_H
 
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Format.h"
+
 namespace llvm {
 class MCInst;
 class raw_ostream;
@@ -36,13 +39,16 @@ protected:
   /// True if we are printing marked up assembly.
   bool UseMarkup;
 
+  /// True if we are printing immediates as hex.
+  bool PrintImmHex;
+
   /// Utility function for printing annotations.
   void printAnnotation(raw_ostream &OS, StringRef Annot);
 public:
   MCInstPrinter(const MCAsmInfo &mai, const MCInstrInfo &mii,
                 const MCRegisterInfo &mri)
     : CommentStream(0), MAI(mai), MII(mii), MRI(mri), AvailableFeatures(0),
-      UseMarkup(0) {}
+      UseMarkup(0), PrintImmHex(0) {}
 
   virtual ~MCInstPrinter();
 
@@ -70,6 +76,12 @@ public:
   /// Utility functions to make adding mark ups simpler.
   StringRef markup(StringRef s) const;
   StringRef markup(StringRef a, StringRef b) const;
+
+  bool getPrintImmHex() const { return PrintImmHex; }
+  void setPrintImmHex(bool Value) { PrintImmHex = Value; }
+
+  /// Utility function to print immediates in decimal or hex.
+  format_object1<int64_t> formatImm(const int64_t Value) const;
 };
 
 } // namespace llvm
diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h
index 23e5513ae3..dcde9d9049 100644
--- a/include/llvm/MC/MCObjectFileInfo.h
+++ b/include/llvm/MC/MCObjectFileInfo.h
@@ -84,15 +84,6 @@ protected:
   /// this is the section to emit them into.
   const MCSection *CompactUnwindSection;
 
-  /// DwarfAccelNamesSection, DwarfAccelObjCSection,
-  /// DwarfAccelNamespaceSection, DwarfAccelTypesSection -
-  /// If we use the DWARF accelerated hash tables then we want toe emit these
-  /// sections.
-  const MCSection *DwarfAccelNamesSection;
-  const MCSection *DwarfAccelObjCSection;
-  const MCSection *DwarfAccelNamespaceSection;
-  const MCSection *DwarfAccelTypesSection;
-
   // Dwarf sections for debug info.  If a target supports debug info, these must
   // be set.
   const MCSection *DwarfAbbrevSection;
@@ -107,6 +98,23 @@ protected:
   const MCSection *DwarfRangesSection;
   const MCSection *DwarfMacroInfoSection;
 
+  // DWARF5 Experimental Debug Info Sections
+  /// DwarfAccelNamesSection, DwarfAccelObjCSection,
+  /// DwarfAccelNamespaceSection, DwarfAccelTypesSection -
+  /// If we use the DWARF accelerated hash tables then we want to emit these
+  /// sections.
+  const MCSection *DwarfAccelNamesSection;
+  const MCSection *DwarfAccelObjCSection;
+  const MCSection *DwarfAccelNamespaceSection;
+  const MCSection *DwarfAccelTypesSection;
+
+  /// These are used for the Fission separate debug information files.
+  const MCSection *DwarfInfoDWOSection;
+  const MCSection *DwarfAbbrevDWOSection;
+  const MCSection *DwarfStrDWOSection;
+  const MCSection *DwarfLineDWOSection;
+  const MCSection *DwarfLocDWOSection;
+
   // Extra TLS Variable Data section.  If the target needs to put additional
   // information for a TLS variable, it'll go here.
   const MCSection *TLSExtraDataSection;
@@ -195,18 +203,6 @@ public:
   const MCSection *getCompactUnwindSection() const{
     return CompactUnwindSection;
   }
-  const MCSection *getDwarfAccelNamesSection() const {
-    return DwarfAccelNamesSection;
-  }
-  const MCSection *getDwarfAccelObjCSection() const {
-    return DwarfAccelObjCSection;
-  }
-  const MCSection *getDwarfAccelNamespaceSection() const {
-    return DwarfAccelNamespaceSection;
-  }
-  const MCSection *getDwarfAccelTypesSection() const {
-    return DwarfAccelTypesSection;
-  }
   const MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; }
   const MCSection *getDwarfInfoSection() const { return DwarfInfoSection; }
   const MCSection *getDwarfLineSection() const { return DwarfLineSection; }
@@ -222,6 +218,36 @@ public:
   const MCSection *getDwarfMacroInfoSection() const {
     return DwarfMacroInfoSection;
   }
+
+  // DWARF5 Experimental Debug Info Sections
+  const MCSection *getDwarfAccelNamesSection() const {
+    return DwarfAccelNamesSection;
+  }
+  const MCSection *getDwarfAccelObjCSection() const {
+    return DwarfAccelObjCSection;
+  }
+  const MCSection *getDwarfAccelNamespaceSection() const {
+    return DwarfAccelNamespaceSection;
+  }
+  const MCSection *getDwarfAccelTypesSection() const {
+    return DwarfAccelTypesSection;
+  }
+  const MCSection *getDwarfInfoDWOSection() const {
+    return DwarfInfoDWOSection;
+  }
+  const MCSection *getDwarfAbbrevDWOSection() const {
+    return DwarfAbbrevDWOSection;
+  }
+  const MCSection *getDwarfStrDWOSection() const {
+    return DwarfStrDWOSection;
+  }
+  const MCSection *getDwarfLineDWOSection() const {
+    return DwarfLineDWOSection;
+  }
+  const MCSection *getDwarfLocDWOSection() const {
+    return DwarfLocDWOSection;
+  }
+
   const MCSection *getTLSExtraDataSection() const {
     return TLSExtraDataSection;
   }
diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h
index 14fe75fd4c..f77b7d853d 100644
--- a/include/llvm/MC/MCObjectWriter.h
+++ b/include/llvm/MC/MCObjectWriter.h
@@ -10,9 +10,9 @@
 #ifndef LLVM_MC_MCOBJECTWRITER_H
 #define LLVM_MC_MCOBJECTWRITER_H
 
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 
 namespace llvm {
@@ -173,7 +173,13 @@ public:
     OS << StringRef(Zeros, N % 16);
   }
 
+  void WriteBytes(SmallVectorImpl<char> &ByteVec, unsigned ZeroFillSize = 0) {
+    WriteBytes(StringRef(ByteVec.data(), ByteVec.size()), ZeroFillSize);
+  }
+
   void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) {
+    // TODO: this version may need to go away once all fragment contents are
+    // converted to SmallVector<char, N>
     assert((ZeroFillSize == 0 || Str.size () <= ZeroFillSize) &&
       "data size greater than fill size, unexpected large write will occur");
     OS << Str;
diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h
index a71d3c3217..b9490fab60 100644
--- a/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/include/llvm/MC/MCParser/MCAsmParser.h
@@ -10,8 +10,8 @@
 #ifndef LLVM_MC_MCASMPARSER_H
 #define LLVM_MC_MCASMPARSER_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 class AsmToken;
diff --git a/include/llvm/MC/MCParser/MCAsmParserExtension.h b/include/llvm/MC/MCParser/MCAsmParserExtension.h
index 0918c93bdf..84b33b59cd 100644
--- a/include/llvm/MC/MCParser/MCAsmParserExtension.h
+++ b/include/llvm/MC/MCParser/MCAsmParserExtension.h
@@ -10,8 +10,8 @@
 #ifndef LLVM_MC_MCASMPARSEREXTENSION_H
 #define LLVM_MC_MCASMPARSEREXTENSION_H
 
-#include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/Support/SMLoc.h"
 
 namespace llvm {
diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h
index f05baeaaf6..7118127e5f 100644
--- a/include/llvm/MC/MCRegisterInfo.h
+++ b/include/llvm/MC/MCRegisterInfo.h
@@ -22,11 +22,15 @@
 
 namespace llvm {
 
+/// An unsigned integer type large enough to represent all physical registers,
+/// but not necessarily virtual registers.
+typedef uint16_t MCPhysReg;
+
 /// MCRegisterClass - Base class of TargetRegisterClass.
 class MCRegisterClass {
 public:
-  typedef const uint16_t* iterator;
-  typedef const uint16_t* const_iterator;
+  typedef const MCPhysReg* iterator;
+  typedef const MCPhysReg* const_iterator;
 
   const char *Name;
   const iterator RegsBegin;
@@ -152,7 +156,7 @@ private:
   unsigned NumClasses;                        // Number of entries in the array
   unsigned NumRegUnits;                       // Number of regunits.
   const uint16_t (*RegUnitRoots)[2];          // Pointer to regunit root table.
-  const uint16_t *DiffLists;                  // Pointer to the difflists array
+  const MCPhysReg *DiffLists;                 // Pointer to the difflists array
   const char *RegStrings;                     // Pointer to the string table.
   const uint16_t *SubRegIndices;              // Pointer to the subreg lookup
                                               // array.
@@ -177,7 +181,7 @@ public:
   /// defined below.
   class DiffListIterator {
     uint16_t Val;
-    const uint16_t *List;
+    const MCPhysReg *List;
 
   protected:
     /// Create an invalid iterator. Call init() to point to something useful.
@@ -186,7 +190,7 @@ public:
     /// init - Point the iterator to InitVal, decoding subsequent values from
     /// DiffList. The iterator will initially point to InitVal, sub-classes are
     /// responsible for skipping the seed value if it is not part of the list.
-    void init(uint16_t InitVal, const uint16_t *DiffList) {
+    void init(MCPhysReg InitVal, const MCPhysReg *DiffList) {
       Val = InitVal;
       List = DiffList;
     }
@@ -196,7 +200,7 @@ public:
     /// is the caller's responsibility (by checking for a 0 return value).
     unsigned advance() {
       assert(isValid() && "Cannot move off the end of the list.");
-      uint16_t D = *List++;
+      MCPhysReg D = *List++;
       Val += D;
       return D;
     }
@@ -231,7 +235,7 @@ public:
                           const MCRegisterClass *C, unsigned NC,
                           const uint16_t (*RURoots)[2],
                           unsigned NRU,
-                          const uint16_t *DL,
+                          const MCPhysReg *DL,
                           const char *Strings,
                           const uint16_t *SubIndices,
                           unsigned NumIndices,
diff --git a/include/llvm/MC/MCSectionCOFF.h b/include/llvm/MC/MCSectionCOFF.h
index b050c0f442..83bc63e652 100644
--- a/include/llvm/MC/MCSectionCOFF.h
+++ b/include/llvm/MC/MCSectionCOFF.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_MC_MCSECTIONCOFF_H
 #define LLVM_MC_MCSECTIONCOFF_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/Support/COFF.h"
-#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
 
diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h
index 4d54465760..329c75cb1d 100644
--- a/include/llvm/MC/MCSectionELF.h
+++ b/include/llvm/MC/MCSectionELF.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_MC_MCSECTIONELF_H
 #define LLVM_MC_MCSECTIONELF_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/Support/ELF.h"
-#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
 
diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h
index 71ea8f3e90..65ad7961b3 100644
--- a/include/llvm/MC/MCSectionMachO.h
+++ b/include/llvm/MC/MCSectionMachO.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_MC_MCSECTIONMACHO_H
 #define LLVM_MC_MCSECTIONMACHO_H
 
-#include "llvm/MC/MCSection.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCSection.h"
 
 namespace llvm {
 
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 10cb5580fe..4bd05a5e73 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -14,12 +14,12 @@
 #ifndef LLVM_MC_MCSTREAMER_H
 #define LLVM_MC_MCSTREAMER_H
 
-#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCWin64EH.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
   class MCAsmBackend;
@@ -70,6 +70,8 @@ namespace llvm {
     SmallVector<std::pair<const MCSection *,
                 const MCSection *>, 4> SectionStack;
 
+    bool AutoInitSections;
+
   protected:
     MCStreamer(MCContext &Ctx);
 
@@ -214,6 +216,17 @@ namespace llvm {
         SectionStack.back().first = Section;
     }
 
+    /// Initialize the streamer.
+    void InitStreamer() {
+      if (AutoInitSections)
+        InitSections();
+    }
+
+    /// Tell this MCStreamer to call InitSections upon initialization.
+    void setAutoInitSections(bool AutoInitSections) {
+      this->AutoInitSections = AutoInitSections;
+    }
+
     /// InitSections - Create the default sections and set the initial one.
     virtual void InitSections() = 0;
 
diff --git a/include/llvm/MC/MCSubtargetInfo.h b/include/llvm/MC/MCSubtargetInfo.h
index 69213cd77d..346fb2df0f 100644
--- a/include/llvm/MC/MCSubtargetInfo.h
+++ b/include/llvm/MC/MCSubtargetInfo.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_MC_MCSUBTARGET_H
 #define LLVM_MC_MCSUBTARGET_H
 
-#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include <string>
 
 namespace llvm {
diff --git a/include/llvm/MC/MCValue.h b/include/llvm/MC/MCValue.h
index f9af8bcfbf..a4e7301114 100644
--- a/include/llvm/MC/MCValue.h
+++ b/include/llvm/MC/MCValue.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_MC_MCVALUE_H
 #define LLVM_MC_MCVALUE_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/DataTypes.h"
 #include <cassert>
 
 namespace llvm {
diff --git a/include/llvm/MC/MCWinCOFFObjectWriter.h b/include/llvm/MC/MCWinCOFFObjectWriter.h
index 7a0b1ffaf0..11df5749d4 100644
--- a/include/llvm/MC/MCWinCOFFObjectWriter.h
+++ b/include/llvm/MC/MCWinCOFFObjectWriter.h
@@ -11,6 +11,9 @@
 #define LLVM_MC_MCWINCOFFOBJECTWRITER_H
 
 namespace llvm {
+  class MCObjectWriter;
+  class raw_ostream;
+
   class MCWinCOFFObjectTargetWriter {
     const unsigned Machine;
 
diff --git a/include/llvm/MC/SubtargetFeature.h b/include/llvm/MC/SubtargetFeature.h
index 57f0518cbf..37ae03b45c 100644
--- a/include/llvm/MC/SubtargetFeature.h
+++ b/include/llvm/MC/SubtargetFeature.h
@@ -18,9 +18,9 @@
 #ifndef LLVM_MC_SUBTARGETFEATURE_H
 #define LLVM_MC_SUBTARGETFEATURE_H
 
-#include <vector>
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/DataTypes.h"
+#include <vector>
 
 namespace llvm {
   class raw_ostream;
diff --git a/include/llvm/MDBuilder.h b/include/llvm/MDBuilder.h
index 1867a63923..c0f0ae67d9 100644
--- a/include/llvm/MDBuilder.h
+++ b/include/llvm/MDBuilder.h
@@ -15,11 +15,11 @@
 #ifndef LLVM_MDBUILDER_H
 #define LLVM_MDBUILDER_H
 
+#include "llvm/ADT/APInt.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
-#include "llvm/ADT/APInt.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h
index 0fbbb95988..b2dbcfdc96 100644
--- a/include/llvm/Metadata.h
+++ b/include/llvm/Metadata.h
@@ -16,10 +16,10 @@
 #ifndef LLVM_METADATA_H
 #define LLVM_METADATA_H
 
-#include "llvm/Value.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/ilist_node.h"
+#include "llvm/Value.h"
 
 namespace llvm {
 class Constant;
diff --git a/include/llvm/Module.h b/include/llvm/Module.h
index 13b56433dc..bf9ac51693 100644
--- a/include/llvm/Module.h
+++ b/include/llvm/Module.h
@@ -15,13 +15,12 @@
 #ifndef LLVM_MODULE_H
 #define LLVM_MODULE_H
 
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
 #include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/Metadata.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/DataTypes.h"
-#include <vector>
 
 namespace llvm {
 
@@ -122,9 +121,6 @@ public:
   /// The type for the list of named metadata.
   typedef ilist<NamedMDNode> NamedMDListType;
 
-  /// The type for the list of dependent libraries.
-  typedef std::vector<std::string> LibraryListType;
-
   /// The Global Variable iterator.
   typedef GlobalListType::iterator                      global_iterator;
   /// The Global Variable constant iterator.
@@ -144,8 +140,6 @@ public:
   typedef NamedMDListType::iterator             named_metadata_iterator;
   /// The named metadata constant interators.
   typedef NamedMDListType::const_iterator const_named_metadata_iterator;
-  /// The Library list iterator.
-  typedef LibraryListType::const_iterator lib_iterator;
 
   /// An enumeration for describing the endianess of the target machine.
   enum Endianness  { AnyEndianness, LittleEndian, BigEndian };
@@ -211,7 +205,6 @@ private:
   GlobalListType GlobalList;      ///< The Global Variables in the module
   FunctionListType FunctionList;  ///< The Functions in the module
   AliasListType AliasList;        ///< The Aliases in the module
-  LibraryListType LibraryList;    ///< The Libraries needed by the module
   NamedMDListType NamedMDList;    ///< The named metadata in the module
   std::string GlobalScopeAsm;     ///< Inline Asm at global scope.
   ValueSymbolTable *ValSymTab;    ///< Symbol table for values
@@ -368,7 +361,7 @@ public:
   ///   4. Finally, the function exists but has the wrong prototype: return the
   ///      function with a constantexpr cast to the right prototype.
   Constant *getOrInsertFunction(StringRef Name, FunctionType *T,
-                                AttrListPtr AttributeList);
+                                AttributeSet AttributeList);
 
   Constant *getOrInsertFunction(StringRef Name, FunctionType *T);
 
@@ -380,7 +373,7 @@ public:
   /// null terminated list of function arguments, which makes it easier for
   /// clients to use.
   Constant *getOrInsertFunction(StringRef Name,
-                                AttrListPtr AttributeList,
+                                AttributeSet AttributeList,
                                 Type *RetTy, ...)  END_WITH_NULL;
 
   /// getOrInsertFunction - Same as above, but without the attributes.
@@ -389,7 +382,7 @@ public:
 
   Constant *getOrInsertTargetIntrinsic(StringRef Name,
                                        FunctionType *Ty,
-                                       AttrListPtr AttributeList);
+                                       AttributeSet AttributeList);
 
   /// getFunction - Look up the specified function in the module symbol table.
   /// If it does not exist, return null.
@@ -576,23 +569,6 @@ public:
   bool                    empty() const { return FunctionList.empty(); }
 
 /// @}
-/// @name Dependent Library Iteration
-/// @{
-
-  /// @brief Get a constant iterator to beginning of dependent library list.
-  inline lib_iterator lib_begin() const { return LibraryList.begin(); }
-  /// @brief Get a constant iterator to end of dependent library list.
-  inline lib_iterator lib_end()   const { return LibraryList.end();   }
-  /// @brief Returns the number of items in the list of libraries.
-  inline size_t       lib_size()  const { return LibraryList.size();  }
-  /// @brief Add a library to the list of dependent libraries
-  void addLibrary(StringRef Lib);
-  /// @brief Remove a library from the list of dependent libraries
-  void removeLibrary(StringRef Lib);
-  /// @brief Get all the libraries
-  inline const LibraryListType& getLibraries() const { return LibraryList; }
-
-/// @}
 /// @name Alias Iteration
 /// @{
 
diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h
index 9cd587195a..8046efda8e 100644
--- a/include/llvm/Object/Archive.h
+++ b/include/llvm/Object/Archive.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_OBJECT_ARCHIVE_H
 #define LLVM_OBJECT_ARCHIVE_H
 
-#include "llvm/Object/Binary.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Object/Binary.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
index a2723c77e9..1634789809 100644
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@@ -14,11 +14,11 @@
 #ifndef LLVM_OBJECT_ELF_H
 #define LLVM_OBJECT_ELF_H
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ELF.h"
diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h
index 4e03daab16..da972a2433 100644
--- a/include/llvm/Object/MachO.h
+++ b/include/llvm/Object/MachO.h
@@ -15,11 +15,11 @@
 #ifndef LLVM_OBJECT_MACHO_H
 #define LLVM_OBJECT_MACHO_H
 
-#include "llvm/Object/ObjectFile.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Object/MachOObject.h"
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/MachO.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallVector.h"
 
 namespace llvm {
 namespace object {
@@ -44,7 +44,7 @@ public:
   virtual unsigned getArch() const;
   virtual StringRef getLoadName() const;
 
-  MachOObject *getObject() { return MachOObj; }
+  MachOObject *getObject() { return MachOObj.get(); }
 
   static inline bool classof(const Binary *v) {
     return v->isMachO();
@@ -104,7 +104,7 @@ protected:
   virtual error_code getLibraryPath(DataRefImpl LibData, StringRef &Res) const;
 
 private:
-  MachOObject *MachOObj;
+  OwningPtr<MachOObject> MachOObj;
   mutable uint32_t RegisteredStringTable;
   typedef SmallVector<DataRefImpl, 1> SectionList;
   SectionList Sections;
diff --git a/include/llvm/Object/MachOObject.h b/include/llvm/Object/MachOObject.h
index 86f150a294..e32a85d140 100644
--- a/include/llvm/Object/MachOObject.h
+++ b/include/llvm/Object/MachOObject.h
@@ -10,11 +10,11 @@
 #ifndef LLVM_OBJECT_MACHOOBJECT_H
 #define LLVM_OBJECT_MACHOOBJECT_H
 
-#include <string>
 #include "llvm/ADT/InMemoryStruct.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Object/MachOFormat.h"
+#include <string>
 
 namespace llvm {
 
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index 1a3120ab8b..e63f554d91 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_OBJECT_OBJECT_FILE_H
 #define LLVM_OBJECT_OBJECT_FILE_H
 
-#include "llvm/Object/Binary.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Object/Binary.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
diff --git a/include/llvm/Object/RelocVisitor.h b/include/llvm/Object/RelocVisitor.h
index 1370c71a7d..9b12946c3a 100644
--- a/include/llvm/Object/RelocVisitor.h
+++ b/include/llvm/Object/RelocVisitor.h
@@ -16,11 +16,11 @@
 #ifndef _LLVM_OBJECT_RELOCVISITOR
 #define _LLVM_OBJECT_RELOCVISITOR
 
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Object/ELF.h"
-#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
 namespace object {
diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h
index 4f89377a89..d31e09e2b4 100644
--- a/include/llvm/Operator.h
+++ b/include/llvm/Operator.h
@@ -164,10 +164,133 @@ public:
   }
 };
 
+/// Convenience struct for specifying and reasoning about fast-math flags.
+class FastMathFlags {
+private:
+  friend class FPMathOperator;
+  unsigned Flags;
+  FastMathFlags(unsigned F) : Flags(F) { }
+
+public:
+  enum {
+    UnsafeAlgebra   = (1 << 0),
+    NoNaNs          = (1 << 1),
+    NoInfs          = (1 << 2),
+    NoSignedZeros   = (1 << 3),
+    AllowReciprocal = (1 << 4)
+  };
+
+  FastMathFlags() : Flags(0)
+  { }
+
+  /// Whether any flag is set
+  bool any() { return Flags != 0; }
+
+  /// Set all the flags to false
+  void clear() { Flags = 0; }
+
+  /// Flag queries
+  bool noNaNs()          { return 0 != (Flags & NoNaNs); }
+  bool noInfs()          { return 0 != (Flags & NoInfs); }
+  bool noSignedZeros()   { return 0 != (Flags & NoSignedZeros); }
+  bool allowReciprocal() { return 0 != (Flags & AllowReciprocal); }
+  bool unsafeAlgebra()   { return 0 != (Flags & UnsafeAlgebra); }
+
+  /// Flag setters
+  void setNoNaNs()          { Flags |= NoNaNs; }
+  void setNoInfs()          { Flags |= NoInfs; }
+  void setNoSignedZeros()   { Flags |= NoSignedZeros; }
+  void setAllowReciprocal() { Flags |= AllowReciprocal; }
+  void setUnsafeAlgebra() {
+    Flags |= UnsafeAlgebra;
+    setNoNaNs();
+    setNoInfs();
+    setNoSignedZeros();
+    setAllowReciprocal();
+  }
+};
+
+
 /// FPMathOperator - Utility class for floating point operations which can have
 /// information about relaxed accuracy requirements attached to them.
 class FPMathOperator : public Operator {
+private:
+  friend class Instruction;
+
+  void setHasUnsafeAlgebra(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~FastMathFlags::UnsafeAlgebra) |
+      (B * FastMathFlags::UnsafeAlgebra);
+
+    // Unsafe algebra implies all the others
+    if (B) {
+      setHasNoNaNs(true);
+      setHasNoInfs(true);
+      setHasNoSignedZeros(true);
+      setHasAllowReciprocal(true);
+    }
+  }
+  void setHasNoNaNs(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~FastMathFlags::NoNaNs) |
+      (B * FastMathFlags::NoNaNs);
+  }
+  void setHasNoInfs(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~FastMathFlags::NoInfs) |
+      (B * FastMathFlags::NoInfs);
+  }
+  void setHasNoSignedZeros(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~FastMathFlags::NoSignedZeros) |
+      (B * FastMathFlags::NoSignedZeros);
+  }
+  void setHasAllowReciprocal(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~FastMathFlags::AllowReciprocal) |
+      (B * FastMathFlags::AllowReciprocal);
+  }
+
+  /// Convenience function for setting all the fast-math flags
+  void setFastMathFlags(FastMathFlags FMF) {
+    SubclassOptionalData |= FMF.Flags;
+  }
+
 public:
+  /// Test whether this operation is permitted to be
+  /// algebraically transformed, aka the 'A' fast-math property.
+  bool hasUnsafeAlgebra() const {
+    return (SubclassOptionalData & FastMathFlags::UnsafeAlgebra) != 0;
+  }
+
+  /// Test whether this operation's arguments and results are to be
+  /// treated as non-NaN, aka the 'N' fast-math property.
+  bool hasNoNaNs() const {
+    return (SubclassOptionalData & FastMathFlags::NoNaNs) != 0;
+  }
+
+  /// Test whether this operation's arguments and results are to be
+  /// treated as NoN-Inf, aka the 'I' fast-math property.
+  bool hasNoInfs() const {
+    return (SubclassOptionalData & FastMathFlags::NoInfs) != 0;
+  }
+
+  /// Test whether this operation can treat the sign of zero
+  /// as insignificant, aka the 'S' fast-math property.
+  bool hasNoSignedZeros() const {
+    return (SubclassOptionalData & FastMathFlags::NoSignedZeros) != 0;
+  }
+
+  /// Test whether this operation is permitted to use
+  /// reciprocal instead of division, aka the 'R' fast-math property.
+  bool hasAllowReciprocal() const {
+    return (SubclassOptionalData & FastMathFlags::AllowReciprocal) != 0;
+  }
+
+  /// Convenience function for getting all the fast-math flags
+  FastMathFlags getFastMathFlags() const {
+    return FastMathFlags(SubclassOptionalData);
+  }
 
   /// \brief Get the maximum error permitted by this operation in ULPs.  An
   /// accuracy of 0.0 means that the operation should be performed with the
diff --git a/include/llvm/Option/Arg.h b/include/llvm/Option/Arg.h
new file mode 100644
index 0000000000..baa4b6a100
--- /dev/null
+++ b/include/llvm/Option/Arg.h
@@ -0,0 +1,132 @@
+//===--- Arg.h - Parsed Argument Classes ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Defines the llvm::Arg class for parsed arguments.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ARG_H_
+#define LLVM_SUPPORT_ARG_H_
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Option/Option.h"
+#include <string>
+
+namespace llvm {
+namespace opt {
+class ArgList;
+
+/// \brief A concrete instance of a particular driver option.
+///
+/// The Arg class encodes just enough information to be able to
+/// derive the argument values efficiently. In addition, Arg
+/// instances have an intrusive double linked list which is used by
+/// ArgList to provide efficient iteration over all instances of a
+/// particular option.
+class Arg {
+  Arg(const Arg &) LLVM_DELETED_FUNCTION;
+  void operator=(const Arg &) LLVM_DELETED_FUNCTION;
+
+private:
+  /// \brief The option this argument is an instance of.
+  const Option Opt;
+
+  /// \brief The argument this argument was derived from (during tool chain
+  /// argument translation), if any.
+  const Arg *BaseArg;
+
+  /// \brief How this instance of the option was spelled.
+  StringRef Spelling;
+
+  /// \brief The index at which this argument appears in the containing
+  /// ArgList.
+  unsigned Index;
+
+  /// \brief Was this argument used to effect compilation?
+  ///
+  /// This is used for generating "argument unused" diagnostics.
+  mutable unsigned Claimed : 1;
+
+  /// \brief Does this argument own its values?
+  mutable unsigned OwnsValues : 1;
+
+  /// \brief The argument values, as C strings.
+  SmallVector<const char *, 2> Values;
+
+public:
+  Arg(const Option Opt, StringRef Spelling, unsigned Index,
+      const Arg *BaseArg = 0);
+  Arg(const Option Opt, StringRef Spelling, unsigned Index,
+      const char *Value0, const Arg *BaseArg = 0);
+  Arg(const Option Opt, StringRef Spelling, unsigned Index,
+      const char *Value0, const char *Value1, const Arg *BaseArg = 0);
+  ~Arg();
+
+  const Option getOption() const { return Opt; }
+  StringRef getSpelling() const { return Spelling; }
+  unsigned getIndex() const { return Index; }
+
+  /// \brief Return the base argument which generated this arg.
+  ///
+  /// This is either the argument itself or the argument it was
+  /// derived from during tool chain specific argument translation.
+  const Arg &getBaseArg() const {
+    return BaseArg ? *BaseArg : *this;
+  }
+  void setBaseArg(const Arg *_BaseArg) {
+    BaseArg = _BaseArg;
+  }
+
+  bool getOwnsValues() const { return OwnsValues; }
+  void setOwnsValues(bool Value) const { OwnsValues = Value; }
+
+  bool isClaimed() const { return getBaseArg().Claimed; }
+
+  /// \brief Set the Arg claimed bit.
+  void claim() const { getBaseArg().Claimed = true; }
+
+  unsigned getNumValues() const { return Values.size(); }
+  const char *getValue(unsigned N = 0) const {
+    return Values[N];
+  }
+
+  SmallVectorImpl<const char*> &getValues() {
+    return Values;
+  }
+
+  bool containsValue(StringRef Value) const {
+    for (unsigned i = 0, e = getNumValues(); i != e; ++i)
+      if (Values[i] == Value)
+        return true;
+    return false;
+  }
+
+  /// \brief Append the argument onto the given array as strings.
+  void render(const ArgList &Args, ArgStringList &Output) const;
+
+  /// \brief Append the argument, render as an input, onto the given
+  /// array as strings.
+  ///
+  /// The distinction is that some options only render their values
+  /// when rendered as a input (e.g., Xlinker).
+  void renderAsInput(const ArgList &Args, ArgStringList &Output) const;
+
+  void dump() const;
+
+  /// \brief Return a formatted version of the argument and
+  /// its values, for debugging and diagnostics.
+  std::string getAsString(const ArgList &Args) const;
+};
+
+} // end namespace opt
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h
new file mode 100644
index 0000000000..7d09f8e6cc
--- /dev/null
+++ b/include/llvm/Option/ArgList.h
@@ -0,0 +1,415 @@
+//===--- ArgList.h - Argument List Management -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ARGLIST_H_
+#define LLVM_SUPPORT_ARGLIST_H_
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Option/OptSpecifier.h"
+
+#include <list>
+#include <string>
+#include <vector>
+
+namespace llvm {
+namespace opt {
+class Arg;
+class ArgList;
+class Option;
+
+/// arg_iterator - Iterates through arguments stored inside an ArgList.
+class arg_iterator {
+  /// The current argument.
+  SmallVectorImpl<Arg*>::const_iterator Current;
+
+  /// The argument list we are iterating over.
+  const ArgList &Args;
+
+  /// Optional filters on the arguments which will be match. Most clients
+  /// should never want to iterate over arguments without filters, so we won't
+  /// bother to factor this into two separate iterator implementations.
+  //
+  // FIXME: Make efficient; the idea is to provide efficient iteration over
+  // all arguments which match a particular id and then just provide an
+  // iterator combinator which takes multiple iterators which can be
+  // efficiently compared and returns them in order.
+  OptSpecifier Id0, Id1, Id2;
+
+  void SkipToNextArg();
+
+public:
+  typedef Arg * const *                 value_type;
+  typedef Arg * const &                 reference;
+  typedef Arg * const *                 pointer;
+  typedef std::forward_iterator_tag   iterator_category;
+  typedef std::ptrdiff_t              difference_type;
+
+  arg_iterator(SmallVectorImpl<Arg*>::const_iterator it,
+                const ArgList &_Args, OptSpecifier _Id0 = 0U,
+                OptSpecifier _Id1 = 0U, OptSpecifier _Id2 = 0U)
+    : Current(it), Args(_Args), Id0(_Id0), Id1(_Id1), Id2(_Id2) {
+    SkipToNextArg();
+  }
+
+  operator const Arg*() { return *Current; }
+  reference operator*() const { return *Current; }
+  pointer operator->() const { return Current; }
+
+  arg_iterator &operator++() {
+    ++Current;
+    SkipToNextArg();
+    return *this;
+  }
+
+  arg_iterator operator++(int) {
+    arg_iterator tmp(*this);
+    ++(*this);
+    return tmp;
+  }
+
+  friend bool operator==(arg_iterator LHS, arg_iterator RHS) {
+    return LHS.Current == RHS.Current;
+  }
+  friend bool operator!=(arg_iterator LHS, arg_iterator RHS) {
+    return !(LHS == RHS);
+  }
+};
+
+/// ArgList - Ordered collection of driver arguments.
+///
+/// The ArgList class manages a list of Arg instances as well as
+/// auxiliary data and convenience methods to allow Tools to quickly
+/// check for the presence of Arg instances for a particular Option
+/// and to iterate over groups of arguments.
+class ArgList {
+private:
+  ArgList(const ArgList &) LLVM_DELETED_FUNCTION;
+  void operator=(const ArgList &) LLVM_DELETED_FUNCTION;
+
+public:
+  typedef SmallVector<Arg*, 16> arglist_type;
+  typedef arglist_type::iterator iterator;
+  typedef arglist_type::const_iterator const_iterator;
+  typedef arglist_type::reverse_iterator reverse_iterator;
+  typedef arglist_type::const_reverse_iterator const_reverse_iterator;
+
+private:
+  /// The internal list of arguments.
+  arglist_type Args;
+
+protected:
+  ArgList();
+
+public:
+  virtual ~ArgList();
+
+  /// @name Arg Access
+  /// @{
+
+  /// append - Append \p A to the arg list.
+  void append(Arg *A);
+
+  arglist_type &getArgs() { return Args; }
+  const arglist_type &getArgs() const { return Args; }
+
+  unsigned size() const { return Args.size(); }
+
+  /// @}
+  /// @name Arg Iteration
+  /// @{
+
+  iterator begin() { return Args.begin(); }
+  iterator end() { return Args.end(); }
+
+  reverse_iterator rbegin() { return Args.rbegin(); }
+  reverse_iterator rend() { return Args.rend(); }
+
+  const_iterator begin() const { return Args.begin(); }
+  const_iterator end() const { return Args.end(); }
+
+  const_reverse_iterator rbegin() const { return Args.rbegin(); }
+  const_reverse_iterator rend() const { return Args.rend(); }
+
+  arg_iterator filtered_begin(OptSpecifier Id0 = 0U, OptSpecifier Id1 = 0U,
+                              OptSpecifier Id2 = 0U) const {
+    return arg_iterator(Args.begin(), *this, Id0, Id1, Id2);
+  }
+  arg_iterator filtered_end() const {
+    return arg_iterator(Args.end(), *this);
+  }
+
+  /// @}
+  /// @name Arg Removal
+  /// @{
+
+  /// eraseArg - Remove any option matching \p Id.
+  void eraseArg(OptSpecifier Id);
+
+  /// @}
+  /// @name Arg Access
+  /// @{
+
+  /// hasArg - Does the arg list contain any option matching \p Id.
+  ///
+  /// \p Claim Whether the argument should be claimed, if it exists.
+  bool hasArgNoClaim(OptSpecifier Id) const {
+    return getLastArgNoClaim(Id) != 0;
+  }
+  bool hasArg(OptSpecifier Id) const {
+    return getLastArg(Id) != 0;
+  }
+  bool hasArg(OptSpecifier Id0, OptSpecifier Id1) const {
+    return getLastArg(Id0, Id1) != 0;
+  }
+  bool hasArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2) const {
+    return getLastArg(Id0, Id1, Id2) != 0;
+  }
+
+  /// getLastArg - Return the last argument matching \p Id, or null.
+  ///
+  /// \p Claim Whether the argument should be claimed, if it exists.
+  Arg *getLastArgNoClaim(OptSpecifier Id) const;
+  Arg *getLastArg(OptSpecifier Id) const;
+  Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1) const;
+  Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2) const;
+  Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2,
+                  OptSpecifier Id3) const;
+  Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2,
+                  OptSpecifier Id3, OptSpecifier Id4) const;
+  Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2,
+                  OptSpecifier Id3, OptSpecifier Id4, OptSpecifier Id5) const;
+  Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2,
+                  OptSpecifier Id3, OptSpecifier Id4, OptSpecifier Id5,
+                  OptSpecifier Id6) const;
+  Arg *getLastArg(OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2,
+                  OptSpecifier Id3, OptSpecifier Id4, OptSpecifier Id5,
+                  OptSpecifier Id6, OptSpecifier Id7) const;
+
+  /// getArgString - Return the input argument string at \p Index.
+  virtual const char *getArgString(unsigned Index) const = 0;
+
+  /// getNumInputArgStrings - Return the number of original argument strings,
+  /// which are guaranteed to be the first strings in the argument string
+  /// list.
+  virtual unsigned getNumInputArgStrings() const = 0;
+
+  /// @}
+  /// @name Argument Lookup Utilities
+  /// @{
+
+  /// getLastArgValue - Return the value of the last argument, or a default.
+  StringRef getLastArgValue(OptSpecifier Id,
+                                  StringRef Default = "") const;
+
+  /// getAllArgValues - Get the values of all instances of the given argument
+  /// as strings.
+  std::vector<std::string> getAllArgValues(OptSpecifier Id) const;
+
+  /// @}
+  /// @name Translation Utilities
+  /// @{
+
+  /// hasFlag - Given an option \p Pos and its negative form \p Neg, return
+  /// true if the option is present, false if the negation is present, and
+  /// \p Default if neither option is given. If both the option and its
+  /// negation are present, the last one wins.
+  bool hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default=true) const;
+
+  /// AddLastArg - Render only the last argument match \p Id0, if present.
+  void AddLastArg(ArgStringList &Output, OptSpecifier Id0) const;
+
+  /// AddAllArgs - Render all arguments matching the given ids.
+  void AddAllArgs(ArgStringList &Output, OptSpecifier Id0,
+                  OptSpecifier Id1 = 0U, OptSpecifier Id2 = 0U) const;
+
+  /// AddAllArgValues - Render the argument values of all arguments
+  /// matching the given ids.
+  void AddAllArgValues(ArgStringList &Output, OptSpecifier Id0,
+                        OptSpecifier Id1 = 0U, OptSpecifier Id2 = 0U) const;
+
+  /// AddAllArgsTranslated - Render all the arguments matching the
+  /// given ids, but forced to separate args and using the provided
+  /// name instead of the first option value.
+  ///
+  /// \param Joined - If true, render the argument as joined with
+  /// the option specifier.
+  void AddAllArgsTranslated(ArgStringList &Output, OptSpecifier Id0,
+                            const char *Translation,
+                            bool Joined = false) const;
+
+  /// ClaimAllArgs - Claim all arguments which match the given
+  /// option id.
+  void ClaimAllArgs(OptSpecifier Id0) const;
+
+  /// ClaimAllArgs - Claim all arguments.
+  ///
+  void ClaimAllArgs() const;
+
+  /// @}
+  /// @name Arg Synthesis
+  /// @{
+
+  /// MakeArgString - Construct a constant string pointer whose
+  /// lifetime will match that of the ArgList.
+  virtual const char *MakeArgString(StringRef Str) const = 0;
+  const char *MakeArgString(const char *Str) const {
+    return MakeArgString(StringRef(Str));
+  }
+  const char *MakeArgString(std::string Str) const {
+    return MakeArgString(StringRef(Str));
+  }
+  const char *MakeArgString(const Twine &Str) const;
+
+  /// \brief Create an arg string for (\p LHS + \p RHS), reusing the
+  /// string at \p Index if possible.
+  const char *GetOrMakeJoinedArgString(unsigned Index, StringRef LHS,
+                                        StringRef RHS) const;
+
+  /// @}
+};
+
+class InputArgList : public ArgList  {
+private:
+  /// List of argument strings used by the contained Args.
+  ///
+  /// This is mutable since we treat the ArgList as being the list
+  /// of Args, and allow routines to add new strings (to have a
+  /// convenient place to store the memory) via MakeIndex.
+  mutable ArgStringList ArgStrings;
+
+  /// Strings for synthesized arguments.
+  ///
+  /// This is mutable since we treat the ArgList as being the list
+  /// of Args, and allow routines to add new strings (to have a
+  /// convenient place to store the memory) via MakeIndex.
+  mutable std::list<std::string> SynthesizedStrings;
+
+  /// The number of original input argument strings.
+  unsigned NumInputArgStrings;
+
+public:
+  InputArgList(const char* const *ArgBegin, const char* const *ArgEnd);
+  ~InputArgList();
+
+  virtual const char *getArgString(unsigned Index) const {
+    return ArgStrings[Index];
+  }
+
+  virtual unsigned getNumInputArgStrings() const {
+    return NumInputArgStrings;
+  }
+
+  /// @name Arg Synthesis
+  /// @{
+
+public:
+  /// MakeIndex - Get an index for the given string(s).
+  unsigned MakeIndex(StringRef String0) const;
+  unsigned MakeIndex(StringRef String0, StringRef String1) const;
+
+  virtual const char *MakeArgString(StringRef Str) const;
+
+  /// @}
+};
+
+/// DerivedArgList - An ordered collection of driver arguments,
+/// whose storage may be in another argument list.
+class DerivedArgList : public ArgList {
+  const InputArgList &BaseArgs;
+
+  /// The list of arguments we synthesized.
+  mutable arglist_type SynthesizedArgs;
+
+public:
+  /// Construct a new derived arg list from \p BaseArgs.
+  DerivedArgList(const InputArgList &BaseArgs);
+  ~DerivedArgList();
+
+  virtual const char *getArgString(unsigned Index) const {
+    return BaseArgs.getArgString(Index);
+  }
+
+  virtual unsigned getNumInputArgStrings() const {
+    return BaseArgs.getNumInputArgStrings();
+  }
+
+  const InputArgList &getBaseArgs() const {
+    return BaseArgs;
+  }
+
+  /// @name Arg Synthesis
+  /// @{
+
+  /// AddSynthesizedArg - Add a argument to the list of synthesized arguments
+  /// (to be freed).
+  void AddSynthesizedArg(Arg *A) {
+    SynthesizedArgs.push_back(A);
+  }
+
+  virtual const char *MakeArgString(StringRef Str) const;
+
+  /// AddFlagArg - Construct a new FlagArg for the given option \p Id and
+  /// append it to the argument list.
+  void AddFlagArg(const Arg *BaseArg, const Option Opt) {
+    append(MakeFlagArg(BaseArg, Opt));
+  }
+
+  /// AddPositionalArg - Construct a new Positional arg for the given option
+  /// \p Id, with the provided \p Value and append it to the argument
+  /// list.
+  void AddPositionalArg(const Arg *BaseArg, const Option Opt,
+                        StringRef Value) {
+    append(MakePositionalArg(BaseArg, Opt, Value));
+  }
+
+
+  /// AddSeparateArg - Construct a new Positional arg for the given option
+  /// \p Id, with the provided \p Value and append it to the argument
+  /// list.
+  void AddSeparateArg(const Arg *BaseArg, const Option Opt,
+                      StringRef Value) {
+    append(MakeSeparateArg(BaseArg, Opt, Value));
+  }
+
+
+  /// AddJoinedArg - Construct a new Positional arg for the given option
+  /// \p Id, with the provided \p Value and append it to the argument list.
+  void AddJoinedArg(const Arg *BaseArg, const Option Opt,
+                    StringRef Value) {
+    append(MakeJoinedArg(BaseArg, Opt, Value));
+  }
+
+
+  /// MakeFlagArg - Construct a new FlagArg for the given option \p Id.
+  Arg *MakeFlagArg(const Arg *BaseArg, const Option Opt) const;
+
+  /// MakePositionalArg - Construct a new Positional arg for the
+  /// given option \p Id, with the provided \p Value.
+  Arg *MakePositionalArg(const Arg *BaseArg, const Option Opt,
+                          StringRef Value) const;
+
+  /// MakeSeparateArg - Construct a new Positional arg for the
+  /// given option \p Id, with the provided \p Value.
+  Arg *MakeSeparateArg(const Arg *BaseArg, const Option Opt,
+                        StringRef Value) const;
+
+  /// MakeJoinedArg - Construct a new Positional arg for the
+  /// given option \p Id, with the provided \p Value.
+  Arg *MakeJoinedArg(const Arg *BaseArg, const Option Opt,
+                      StringRef Value) const;
+
+  /// @}
+};
+
+} // end namespace opt
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Option/OptParser.td b/include/llvm/Option/OptParser.td
new file mode 100644
index 0000000000..e781fa02d7
--- /dev/null
+++ b/include/llvm/Option/OptParser.td
@@ -0,0 +1,127 @@
+//===--- OptParser.td - Common Option Parsing Interfaces ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the common interfaces used by the option parsing TableGen
+//  backend.
+//
+//===----------------------------------------------------------------------===//
+
+// Define the kinds of options.
+
+class OptionKind<string name, int predecence = 0, bit sentinel = 0> {
+  string Name = name;
+  // The kind precedence, kinds with lower precedence are matched first.
+  int Precedence = predecence;
+  // Indicate a sentinel option.
+  bit Sentinel = sentinel;
+}
+
+// An option group.
+def KIND_GROUP : OptionKind<"Group">;
+// The input option kind.
+def KIND_INPUT : OptionKind<"Input", 1, 1>;
+// The unknown option kind.
+def KIND_UNKNOWN : OptionKind<"Unknown", 2, 1>;
+// A flag with no values.
+def KIND_FLAG : OptionKind<"Flag">;
+// An option which prefixes its (single) value.
+def KIND_JOINED : OptionKind<"Joined", 1>;
+// An option which is followed by its value.
+def KIND_SEPARATE : OptionKind<"Separate">;
+// An option followed by its values, which are separated by commas.
+def KIND_COMMAJOINED : OptionKind<"CommaJoined">;
+// An option which is which takes multiple (separate) arguments.
+def KIND_MULTIARG : OptionKind<"MultiArg">;
+// An option which is either joined to its (non-empty) value, or followed by its
+// value.
+def KIND_JOINED_OR_SEPARATE : OptionKind<"JoinedOrSeparate">;
+// An option which is both joined to its (first) value, and followed by its
+// (second) value.
+def KIND_JOINED_AND_SEPARATE : OptionKind<"JoinedAndSeparate">;
+
+// Define the option flags.
+
+class OptionFlag {}
+
+// HelpHidden - The option should not be displayed in --help, even if it has
+// help text. Clients *can* use this in conjunction with the OptTable::PrintHelp
+// arguments to implement hidden help groups.
+def HelpHidden : OptionFlag;
+
+// RenderAsInput - The option should not render the name when rendered as an
+// input (i.e., the option is rendered as values).
+def RenderAsInput : OptionFlag;
+
+// RenderJoined - The option should be rendered joined, even if separate (only
+// sensible on single value separate options).
+def RenderJoined : OptionFlag;
+
+// RenderSeparate - The option should be rendered separately, even if joined
+// (only sensible on joined options).
+def RenderSeparate : OptionFlag;
+
+// Define the option group class.
+
+class OptionGroup<string name> {
+  string EnumName = ?; // Uses the def name if undefined.
+  string Name = name;
+  string HelpText = ?;
+  OptionGroup Group = ?;
+}
+
+// Define the option class.
+
+class Option<list<string> prefixes, string name, OptionKind kind> {
+  string EnumName = ?; // Uses the def name if undefined.
+  list<string> Prefixes = prefixes;
+  string Name = name;
+  OptionKind Kind = kind;
+  // Used by MultiArg option kind.
+  int NumArgs = 0;
+  string HelpText = ?;
+  string MetaVarName = ?;
+  list<OptionFlag> Flags = [];
+  OptionGroup Group = ?;
+  Option Alias = ?;
+}
+
+// Helpers for defining options.
+
+class Flag<list<string> prefixes, string name>
+  : Option<prefixes, name, KIND_FLAG>;
+class Joined<list<string> prefixes, string name>
+  : Option<prefixes, name, KIND_JOINED>;
+class Separate<list<string> prefixes, string name>
+  : Option<prefixes, name, KIND_SEPARATE>;
+class CommaJoined<list<string> prefixes, string name>
+  : Option<prefixes, name, KIND_COMMAJOINED>;
+class MultiArg<list<string> prefixes, string name, int numargs>
+  : Option<prefixes, name, KIND_MULTIARG> {
+  int NumArgs = numargs;
+}
+class JoinedOrSeparate<list<string> prefixes, string name>
+  : Option<prefixes, name, KIND_JOINED_OR_SEPARATE>;
+class JoinedAndSeparate<list<string> prefixes, string name>
+  : Option<prefixes, name, KIND_JOINED_AND_SEPARATE>;
+
+// Mix-ins for adding optional attributes.
+
+class Alias<Option alias> { Option Alias = alias; }
+class EnumName<string name> { string EnumName = name; }
+class Flags<list<OptionFlag> flags> { list<OptionFlag> Flags = flags; }
+class Group<OptionGroup group> { OptionGroup Group = group; }
+class HelpText<string text> { string HelpText = text; }
+class MetaVarName<string name> { string MetaVarName = name; }
+
+// Predefined options.
+
+// FIXME: Have generator validate that these appear in correct position (and
+// aren't duplicated).
+def INPUT : Option<[], "<input>", KIND_INPUT>;
+def UNKNOWN : Option<[], "<unknown>", KIND_UNKNOWN>;
diff --git a/include/llvm/Option/OptSpecifier.h b/include/llvm/Option/OptSpecifier.h
new file mode 100644
index 0000000000..3bc9bb24ed
--- /dev/null
+++ b/include/llvm/Option/OptSpecifier.h
@@ -0,0 +1,39 @@
+//===--- OptSpecifier.h - Option Specifiers ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_OPTSPECIFIER_H
+#define LLVM_SUPPORT_OPTSPECIFIER_H
+
+namespace llvm {
+namespace opt {
+  class Option;
+
+  /// OptSpecifier - Wrapper class for abstracting references to option IDs.
+  class OptSpecifier {
+    unsigned ID;
+
+  private:
+    explicit OptSpecifier(bool); // DO NOT IMPLEMENT
+
+  public:
+    OptSpecifier() : ID(0) {}
+    /*implicit*/ OptSpecifier(unsigned _ID) : ID(_ID) {}
+    /*implicit*/ OptSpecifier(const Option *Opt);
+
+    bool isValid() const { return ID != 0; }
+
+    unsigned getID() const { return ID; }
+
+    bool operator==(OptSpecifier Opt) const { return ID == Opt.getID(); }
+    bool operator!=(OptSpecifier Opt) const { return !(*this == Opt); }
+  };
+}
+}
+
+#endif
diff --git a/include/llvm/Option/OptTable.h b/include/llvm/Option/OptTable.h
new file mode 100644
index 0000000000..930549fbd3
--- /dev/null
+++ b/include/llvm/Option/OptTable.h
@@ -0,0 +1,161 @@
+//===--- OptTable.h - Option Table ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_OPTTABLE_H
+#define LLVM_SUPPORT_OPTTABLE_H
+
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Option/OptSpecifier.h"
+
+namespace llvm {
+class raw_ostream;
+namespace opt {
+class Arg;
+class ArgList;
+class InputArgList;
+class Option;
+
+/// \brief Provide access to the Option info table.
+///
+/// The OptTable class provides a layer of indirection which allows Option
+/// instance to be created lazily. In the common case, only a few options will
+/// be needed at runtime; the OptTable class maintains enough information to
+/// parse command lines without instantiating Options, while letting other
+/// parts of the driver still use Option instances where convenient.
+class OptTable {
+public:
+  /// \brief Entry for a single option instance in the option data table.
+  struct Info {
+    /// A null terminated array of prefix strings to apply to name while
+    /// matching.
+    const char *const *Prefixes;
+    const char *Name;
+    const char *HelpText;
+    const char *MetaVar;
+    unsigned ID;
+    unsigned char Kind;
+    unsigned char Param;
+    unsigned short Flags;
+    unsigned short GroupID;
+    unsigned short AliasID;
+  };
+
+private:
+  /// \brief The static option information table.
+  const Info *OptionInfos;
+  unsigned NumOptionInfos;
+
+  unsigned TheInputOptionID;
+  unsigned TheUnknownOptionID;
+
+  /// The index of the first option which can be parsed (i.e., is not a
+  /// special option like 'input' or 'unknown', and is not an option group).
+  unsigned FirstSearchableIndex;
+
+  /// The union of all option prefixes. If an argument does not begin with
+  /// one of these, it is an input.
+  StringSet<> PrefixesUnion;
+  std::string PrefixChars;
+
+private:
+  const Info &getInfo(OptSpecifier Opt) const {
+    unsigned id = Opt.getID();
+    assert(id > 0 && id - 1 < getNumOptions() && "Invalid Option ID.");
+    return OptionInfos[id - 1];
+  }
+
+protected:
+  OptTable(const Info *_OptionInfos, unsigned _NumOptionInfos);
+public:
+  ~OptTable();
+
+  /// \brief Return the total number of option classes.
+  unsigned getNumOptions() const { return NumOptionInfos; }
+
+  /// \brief Get the given Opt's Option instance, lazily creating it
+  /// if necessary.
+  ///
+  /// \return The option, or null for the INVALID option id.
+  const Option getOption(OptSpecifier Opt) const;
+
+  /// \brief Lookup the name of the given option.
+  const char *getOptionName(OptSpecifier id) const {
+    return getInfo(id).Name;
+  }
+
+  /// \brief Get the kind of the given option.
+  unsigned getOptionKind(OptSpecifier id) const {
+    return getInfo(id).Kind;
+  }
+
+  /// \brief Get the group id for the given option.
+  unsigned getOptionGroupID(OptSpecifier id) const {
+    return getInfo(id).GroupID;
+  }
+
+  /// \brief Should the help for the given option be hidden by default.
+  bool isOptionHelpHidden(OptSpecifier id) const;
+
+  /// \brief Get the help text to use to describe this option.
+  const char *getOptionHelpText(OptSpecifier id) const {
+    return getInfo(id).HelpText;
+  }
+
+  /// \brief Get the meta-variable name to use when describing
+  /// this options values in the help text.
+  const char *getOptionMetaVar(OptSpecifier id) const {
+    return getInfo(id).MetaVar;
+  }
+
+  /// \brief Parse a single argument; returning the new argument and
+  /// updating Index.
+  ///
+  /// \param [in,out] Index - The current parsing position in the argument
+  /// string list; on return this will be the index of the next argument
+  /// string to parse.
+  ///
+  /// \return The parsed argument, or 0 if the argument is missing values
+  /// (in which case Index still points at the conceptual next argument string
+  /// to parse).
+  Arg *ParseOneArg(const ArgList &Args, unsigned &Index) const;
+
+  /// \brief Parse an list of arguments into an InputArgList.
+  ///
+  /// The resulting InputArgList will reference the strings in [\p ArgBegin,
+  /// \p ArgEnd), and their lifetime should extend past that of the returned
+  /// InputArgList.
+  ///
+  /// The only error that can occur in this routine is if an argument is
+  /// missing values; in this case \p MissingArgCount will be non-zero.
+  ///
+  /// \param ArgBegin - The beginning of the argument vector.
+  /// \param ArgEnd - The end of the argument vector.
+  /// \param MissingArgIndex - On error, the index of the option which could
+  /// not be parsed.
+  /// \param MissingArgCount - On error, the number of missing options.
+  /// \return An InputArgList; on error this will contain all the options
+  /// which could be parsed.
+  InputArgList *ParseArgs(const char* const *ArgBegin,
+                          const char* const *ArgEnd,
+                          unsigned &MissingArgIndex,
+                          unsigned &MissingArgCount) const;
+
+  /// \brief Render the help text for an option table.
+  ///
+  /// \param OS - The stream to write the help text to.
+  /// \param Name - The name to use in the usage line.
+  /// \param Title - The title to use in the usage line.
+  /// \param ShowHidden - Whether help-hidden arguments should be shown.
+  void PrintHelp(raw_ostream &OS, const char *Name,
+                  const char *Title, bool ShowHidden = false) const;
+};
+} // end namespace opt
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Option/Option.h b/include/llvm/Option/Option.h
new file mode 100644
index 0000000000..d0e46112a3
--- /dev/null
+++ b/include/llvm/Option/Option.h
@@ -0,0 +1,193 @@
+//===--- Option.h - Abstract Driver Options ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_OPTION_H_
+#define LLVM_SUPPORT_OPTION_H_
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Option/OptTable.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+namespace opt {
+class Arg;
+class ArgList;
+/// ArgStringList - Type used for constructing argv lists for subprocesses.
+typedef SmallVector<const char*, 16> ArgStringList;
+
+/// Base flags for all options. Custom flags may be added after.
+enum DriverFlag {
+  HelpHidden       = (1 << 0),
+  RenderAsInput    = (1 << 1),
+  RenderJoined     = (1 << 2),
+  RenderSeparate   = (1 << 3)
+};
+
+/// Option - Abstract representation for a single form of driver
+/// argument.
+///
+/// An Option class represents a form of option that the driver
+/// takes, for example how many arguments the option has and how
+/// they can be provided. Individual option instances store
+/// additional information about what group the option is a member
+/// of (if any), if the option is an alias, and a number of
+/// flags. At runtime the driver parses the command line into
+/// concrete Arg instances, each of which corresponds to a
+/// particular Option instance.
+class Option {
+public:
+  enum OptionClass {
+    GroupClass = 0,
+    InputClass,
+    UnknownClass,
+    FlagClass,
+    JoinedClass,
+    SeparateClass,
+    CommaJoinedClass,
+    MultiArgClass,
+    JoinedOrSeparateClass,
+    JoinedAndSeparateClass
+  };
+
+  enum RenderStyleKind {
+    RenderCommaJoinedStyle,
+    RenderJoinedStyle,
+    RenderSeparateStyle,
+    RenderValuesStyle
+  };
+
+protected:
+  const OptTable::Info *Info;
+  const OptTable *Owner;
+
+public:
+  Option(const OptTable::Info *Info, const OptTable *Owner);
+  ~Option();
+
+  bool isValid() const {
+    return Info != 0;
+  }
+
+  unsigned getID() const {
+    assert(Info && "Must have a valid info!");
+    return Info->ID;
+  }
+
+  OptionClass getKind() const {
+    assert(Info && "Must have a valid info!");
+    return OptionClass(Info->Kind);
+  }
+
+  /// \brief Get the name of this option without any prefix.
+  StringRef getName() const {
+    assert(Info && "Must have a valid info!");
+    return Info->Name;
+  }
+
+  const Option getGroup() const {
+    assert(Info && "Must have a valid info!");
+    assert(Owner && "Must have a valid owner!");
+    return Owner->getOption(Info->GroupID);
+  }
+
+  const Option getAlias() const {
+    assert(Info && "Must have a valid info!");
+    assert(Owner && "Must have a valid owner!");
+    return Owner->getOption(Info->AliasID);
+  }
+
+  /// \brief Get the default prefix for this option.
+  StringRef getPrefix() const {
+    const char *Prefix = *Info->Prefixes;
+    return Prefix ? Prefix : StringRef();
+  }
+
+  /// \brief Get the name of this option with the default prefix.
+  std::string getPrefixedName() const {
+    std::string Ret = getPrefix();
+    Ret += getName();
+    return Ret;
+  }
+
+  unsigned getNumArgs() const { return Info->Param; }
+
+  bool hasNoOptAsInput() const { return Info->Flags & RenderAsInput;}
+
+  RenderStyleKind getRenderStyle() const {
+    if (Info->Flags & RenderJoined)
+      return RenderJoinedStyle;
+    if (Info->Flags & RenderSeparate)
+      return RenderSeparateStyle;
+    switch (getKind()) {
+    case GroupClass:
+    case InputClass:
+    case UnknownClass:
+      return RenderValuesStyle;
+    case JoinedClass:
+    case JoinedAndSeparateClass:
+      return RenderJoinedStyle;
+    case CommaJoinedClass:
+      return RenderCommaJoinedStyle;
+    case FlagClass:
+    case SeparateClass:
+    case MultiArgClass:
+    case JoinedOrSeparateClass:
+      return RenderSeparateStyle;
+    }
+    llvm_unreachable("Unexpected kind!");
+  }
+
+  /// Test if this option has the flag \a Val.
+  bool hasFlag(unsigned Val) const {
+    return Info->Flags & Val;
+  }
+
+  /// getUnaliasedOption - Return the final option this option
+  /// aliases (itself, if the option has no alias).
+  const Option getUnaliasedOption() const {
+    const Option Alias = getAlias();
+    if (Alias.isValid()) return Alias.getUnaliasedOption();
+    return *this;
+  }
+
+  /// getRenderName - Return the name to use when rendering this
+  /// option.
+  StringRef getRenderName() const {
+    return getUnaliasedOption().getName();
+  }
+
+  /// matches - Predicate for whether this option is part of the
+  /// given option (which may be a group).
+  ///
+  /// Note that matches against options which are an alias should never be
+  /// done -- aliases do not participate in matching and so such a query will
+  /// always be false.
+  bool matches(OptSpecifier ID) const;
+
+  /// accept - Potentially accept the current argument, returning a
+  /// new Arg instance, or 0 if the option does not accept this
+  /// argument (or the argument is missing values).
+  ///
+  /// If the option accepts the current argument, accept() sets
+  /// Index to the position where argument parsing should resume
+  /// (even if the argument is missing values).
+  ///
+  /// \parm ArgSize The number of bytes taken up by the matched Option prefix
+  ///               and name. This is used to determine where joined values
+  ///               start.
+  Arg *accept(const ArgList &Args, unsigned &Index, unsigned ArgSize) const;
+
+  void dump() const;
+};
+
+} // end namespace opt
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index 7b6f169666..35ec022516 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -104,6 +104,16 @@ public:
     return PassID;
   }
 
+  /// doInitialization - Virtual method overridden by subclasses to do
+  /// any necessary initialization before any pass is run.
+  ///
+  virtual bool doInitialization(Module &)  { return false; }
+
+  /// doFinalization - Virtual method overriden by subclasses to do any
+  /// necessary clean up after all passes have run.
+  ///
+  virtual bool doFinalization(Module &) { return false; }
+
   /// print - Print out the internal state of the pass.  This is called by
   /// Analyze to print out the contents of an analysis.  Otherwise it is not
   /// necessary to implement this method.  Beware that the module pointer MAY be
@@ -227,20 +237,10 @@ public:
   /// createPrinterPass - Get a module printer pass.
   Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
 
-  /// doInitialization - Virtual method overridden by subclasses to do
-  /// any necessary initialization.
-  ///
-  virtual bool doInitialization()  { return false; }
-
   /// runOnModule - Virtual method overriden by subclasses to process the module
   /// being operated on.
   virtual bool runOnModule(Module &M) = 0;
 
-  /// doFinalization - Virtual method overriden by subclasses to do any post
-  /// processing needed after all passes have run.
-  ///
-  virtual bool doFinalization() { return false; }
-
   virtual void assignPassManager(PMStack &PMS,
                                  PassManagerType T);
 
@@ -297,21 +297,11 @@ public:
   /// createPrinterPass - Get a function printer pass.
   Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
 
-  /// doInitialization - Virtual method overridden by subclasses to do
-  /// any necessary per-module initialization.
-  ///
-  virtual bool doInitialization(Module &);
-
   /// runOnFunction - Virtual method overriden by subclasses to do the
   /// per-function processing of the pass.
   ///
   virtual bool runOnFunction(Function &F) = 0;
 
-  /// doFinalization - Virtual method overriden by subclasses to do any post
-  /// processing needed after all passes have run.
-  ///
-  virtual bool doFinalization(Module &);
-
   virtual void assignPassManager(PMStack &PMS,
                                  PassManagerType T);
 
@@ -338,10 +328,8 @@ public:
   /// createPrinterPass - Get a basic block printer pass.
   Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
 
-  /// doInitialization - Virtual method overridden by subclasses to do
-  /// any necessary per-module initialization.
-  ///
-  virtual bool doInitialization(Module &);
+  using llvm::Pass::doInitialization;
+  using llvm::Pass::doFinalization;
 
   /// doInitialization - Virtual method overridden by BasicBlockPass subclasses
   /// to do any necessary per-function initialization.
@@ -358,11 +346,6 @@ public:
   ///
   virtual bool doFinalization(Function &);
 
-  /// doFinalization - Virtual method overriden by subclasses to do any post
-  /// processing needed after all passes have run.
-  ///
-  virtual bool doFinalization(Module &);
-
   virtual void assignPassManager(PMStack &PMS,
                                  PassManagerType T);
 
diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h
index d14d73b1b1..1cc574134f 100644
--- a/include/llvm/PassAnalysisSupport.h
+++ b/include/llvm/PassAnalysisSupport.h
@@ -19,9 +19,9 @@
 #ifndef LLVM_PASS_ANALYSIS_SUPPORT_H
 #define LLVM_PASS_ANALYSIS_SUPPORT_H
 
-#include "llvm/Pass.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Pass.h"
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/PassManager.h b/include/llvm/PassManager.h
index 1d5e800b4d..ce5fda79f9 100644
--- a/include/llvm/PassManager.h
+++ b/include/llvm/PassManager.h
@@ -58,14 +58,6 @@ public:
   /// whether any of the passes modifies the module, and if so, return true.
   bool run(Module &M);
 
-  /// doInitialization - Run all of the initializers for the module passes.
-  ///
-  bool doInitialization();
-
-  /// doFinalization - Run all of the finalizers for the module passes.
-  ///
-  bool doFinalization();
-
 private:
   /// PassManagerImpl_New is the actual class. PassManager is just the
   /// wraper to publish simple pass manager interface
diff --git a/include/llvm/PassManagers.h b/include/llvm/PassManagers.h
index b0450f3e00..fac792824f 100644
--- a/include/llvm/PassManagers.h
+++ b/include/llvm/PassManagers.h
@@ -14,13 +14,13 @@
 #ifndef LLVM_PASSMANAGERS_H
 #define LLVM_PASSMANAGERS_H
 
-#include "llvm/Pass.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/DenseMap.h"
-#include <vector>
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Pass.h"
 #include <map>
+#include <vector>
 
 //===----------------------------------------------------------------------===//
 // Overview:
diff --git a/include/llvm/PassSupport.h b/include/llvm/PassSupport.h
index c6ad44f5f4..81b3ce153c 100644
--- a/include/llvm/PassSupport.h
+++ b/include/llvm/PassSupport.h
@@ -22,8 +22,8 @@
 #define LLVM_PASS_SUPPORT_H
 
 #include "Pass.h"
-#include "llvm/PassRegistry.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/PassRegistry.h"
 #include "llvm/Support/Atomic.h"
 #include "llvm/Support/Valgrind.h"
 #include <vector>
diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h
index a644b13366..3243fd9cea 100644
--- a/include/llvm/Support/Allocator.h
+++ b/include/llvm/Support/Allocator.h
@@ -15,12 +15,12 @@
 #define LLVM_SUPPORT_ALLOCATOR_H
 
 #include "llvm/Support/AlignOf.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
 #include <algorithm>
 #include <cassert>
-#include <cstdlib>
 #include <cstddef>
+#include <cstdlib>
 
 namespace llvm {
 template <typename T> struct ReferenceAdder { typedef T& result; };
diff --git a/include/llvm/Support/CallSite.h b/include/llvm/Support/CallSite.h
index ad8d6d41fc..7a0b8db9bb 100644
--- a/include/llvm/Support/CallSite.h
+++ b/include/llvm/Support/CallSite.h
@@ -26,8 +26,8 @@
 #ifndef LLVM_SUPPORT_CALLSITE_H
 #define LLVM_SUPPORT_CALLSITE_H
 
-#include "llvm/Attributes.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Attributes.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Instructions.h"
@@ -177,10 +177,10 @@ public:
 
   /// getAttributes/setAttributes - get or set the parameter attributes of
   /// the call.
-  const AttrListPtr &getAttributes() const {
+  const AttributeSet &getAttributes() const {
     CALLSITE_DELEGATE_GETTER(getAttributes());
   }
-  void setAttributes(const AttrListPtr &PAL) {
+  void setAttributes(const AttributeSet &PAL) {
     CALLSITE_DELEGATE_SETTER(setAttributes(PAL));
   }
 
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index 872c57998c..0ab39fca38 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -20,10 +20,10 @@
 #ifndef LLVM_SUPPORT_COMMANDLINE_H
 #define LLVM_SUPPORT_COMMANDLINE_H
 
-#include "llvm/Support/type_traits.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/type_traits.h"
 #include <cassert>
 #include <climits>
 #include <cstdarg>
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index 5acc7160ab..fce30e8b7d 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -19,25 +19,47 @@
 # define __has_feature(x) 0
 #endif
 
-/// LLVM_HAS_RVALUE_REFERENCES - Does the compiler provide r-value references?
+/// \brief Does the compiler support r-value references?
 /// This implies that <utility> provides the one-argument std::move;  it
 /// does not imply the existence of any other C++ library features.
 #if (__has_feature(cxx_rvalue_references)   \
      || defined(__GXX_EXPERIMENTAL_CXX0X__) \
      || (defined(_MSC_VER) && _MSC_VER >= 1600))
-#define LLVM_USE_RVALUE_REFERENCES 1
+#define LLVM_HAS_RVALUE_REFERENCES 1
 #else
-#define LLVM_USE_RVALUE_REFERENCES 0
+#define LLVM_HAS_RVALUE_REFERENCES 0
+#endif
+
+/// \brief Does the compiler support r-value reference *this?
+///
+/// Sadly, this is separate from just r-value reference support because GCC
+/// implemented everything but this thus far. No release of GCC yet has support
+/// for this feature so it is enabled with Clang only.
+/// FIXME: This should change to a version check when GCC grows support for it.
+#if __has_feature(cxx_rvalue_references)
+#define LLVM_HAS_RVALUE_REFERENCE_THIS 1
+#else
+#define LLVM_HAS_RVALUE_REFERENCE_THIS 0
 #endif
 
 /// llvm_move - Expands to ::std::move if the compiler supports
 /// r-value references; otherwise, expands to the argument.
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
 #define llvm_move(value) (::std::move(value))
 #else
 #define llvm_move(value) (value)
 #endif
 
+/// Expands to '&' if r-value references are supported.
+///
+/// This can be used to provide l-value/r-value overrides of member functions.
+/// The r-value override should be guarded by LLVM_HAS_RVALUE_REFERENCE_THIS
+#if LLVM_HAS_RVALUE_REFERENCE_THIS
+#define LLVM_LVALUE_FUNCTION &
+#else
+#define LLVM_LVALUE_FUNCTION
+#endif
+
 /// LLVM_DELETED_FUNCTION - Expands to = delete if the compiler supports it.
 /// Use to mark functions as uncallable. Member functions with this should
 /// be declared private so that some behavior is kept in C++03 mode.
diff --git a/include/llvm/Support/DataFlow.h b/include/llvm/Support/DataFlow.h
index 355c402f54..cc3ff0da5c 100644
--- a/include/llvm/Support/DataFlow.h
+++ b/include/llvm/Support/DataFlow.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_SUPPORT_DATAFLOW_H
 #define LLVM_SUPPORT_DATAFLOW_H
 
-#include "llvm/User.h"
 #include "llvm/ADT/GraphTraits.h"
+#include "llvm/User.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index 0b60150168..54df4ebabc 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -473,8 +473,13 @@ enum {
   R_PPC64_ADDR16_HIGHER       = 39,
   R_PPC64_ADDR16_HIGHEST      = 41,
   R_PPC64_TOC16               = 47,
+  R_PPC64_TOC16_LO            = 48,
+  R_PPC64_TOC16_HA            = 50,
   R_PPC64_TOC                 = 51,
-  R_PPC64_TOC16_DS            = 63
+  R_PPC64_TOC16_DS            = 63,
+  R_PPC64_TOC16_LO_DS         = 64,
+  R_PPC64_TLS                 = 67,
+  R_PPC64_GOT_TPREL16_DS      = 87
 };
 
 // ARM Specific e_flags
diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h
index 95b01095c1..ca5dec0173 100644
--- a/include/llvm/Support/ErrorHandling.h
+++ b/include/llvm/Support/ErrorHandling.h
@@ -15,8 +15,8 @@
 #ifndef LLVM_SUPPORT_ERRORHANDLING_H
 #define LLVM_SUPPORT_ERRORHANDLING_H
 
-#include "llvm/Support/Compiler.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
 #include <string>
 
 namespace llvm {
diff --git a/include/llvm/Support/FileOutputBuffer.h b/include/llvm/Support/FileOutputBuffer.h
index bcd35e3c1e..cbc9c467d2 100644
--- a/include/llvm/Support/FileOutputBuffer.h
+++ b/include/llvm/Support/FileOutputBuffer.h
@@ -14,85 +14,79 @@
 #ifndef LLVM_SUPPORT_FILEOUTPUTBUFFER_H
 #define LLVM_SUPPORT_FILEOUTPUTBUFFER_H
 
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/FileSystem.h"
 
 namespace llvm {
-
 class error_code;
-template<class T> class OwningPtr;
 
 /// FileOutputBuffer - This interface provides simple way to create an in-memory
-/// buffer which will be written to a file. During the lifetime of these 
+/// buffer which will be written to a file. During the lifetime of these
 /// objects, the content or existence of the specified file is undefined. That
 /// is, creating an OutputBuffer for a file may immediately remove the file.
-/// If the FileOutputBuffer is committed, the target file's content will become 
-/// the buffer content at the time of the commit.  If the FileOutputBuffer is  
+/// If the FileOutputBuffer is committed, the target file's content will become
+/// the buffer content at the time of the commit.  If the FileOutputBuffer is
 /// not committed, the file will be deleted in the FileOutputBuffer destructor.
 class FileOutputBuffer {
 public:
 
   enum  {
     F_executable = 1  /// set the 'x' bit on the resulting file
-  }; 
+  };
 
   /// Factory method to create an OutputBuffer object which manages a read/write
   /// buffer of the specified size. When committed, the buffer will be written
-  /// to the file at the specified path.  
-  static error_code create(StringRef FilePath, size_t Size, 
-                           OwningPtr<FileOutputBuffer> &Result, 
-                           unsigned Flags=0);
-  
+  /// to the file at the specified path.
+  static error_code create(StringRef FilePath, size_t Size,
+                           OwningPtr<FileOutputBuffer> &Result,
+                           unsigned Flags = 0);
 
   /// Returns a pointer to the start of the buffer.
-  uint8_t *getBufferStart() const {
-    return BufferStart;
+  uint8_t *getBufferStart() {
+    return (uint8_t*)Region->data();
   }
-  
+
   /// Returns a pointer to the end of the buffer.
-  uint8_t *getBufferEnd() const {
-    return BufferEnd;
+  uint8_t *getBufferEnd() {
+    return (uint8_t*)Region->data() + Region->size();
   }
-  
+
   /// Returns size of the buffer.
   size_t getBufferSize() const {
-    return BufferEnd - BufferStart;
+    return Region->size();
   }
-  
+
   /// Returns path where file will show up if buffer is committed.
   StringRef getPath() const {
     return FinalPath;
   }
-    
-  /// Flushes the content of the buffer to its file and deallocates the 
+
+  /// Flushes the content of the buffer to its file and deallocates the
   /// buffer.  If commit() is not called before this object's destructor
   /// is called, the file is deleted in the destructor. The optional parameter
   /// is used if it turns out you want the file size to be smaller than
   /// initially requested.
   error_code commit(int64_t NewSmallerSize = -1);
-  
+
   /// If this object was previously committed, the destructor just deletes
   /// this object.  If this object was not committed, the destructor
   /// deallocates the buffer and the target file is never written.
   ~FileOutputBuffer();
 
-  
 private:
   FileOutputBuffer(const FileOutputBuffer &) LLVM_DELETED_FUNCTION;
   FileOutputBuffer &operator=(const FileOutputBuffer &) LLVM_DELETED_FUNCTION;
-protected:
-  FileOutputBuffer(uint8_t *Start, uint8_t *End,
-                    StringRef Path, StringRef TempPath);
-    
-  uint8_t            *BufferStart;
-  uint8_t            *BufferEnd;
+
+  FileOutputBuffer(llvm::sys::fs::mapped_file_region *R,
+                   StringRef Path, StringRef TempPath);
+
+  OwningPtr<llvm::sys::fs::mapped_file_region> Region;
   SmallString<128>    FinalPath;
   SmallString<128>    TempPath;
 };
-
-
-
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
index b455b28b81..f1cbe978c1 100644
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@@ -607,7 +607,7 @@ private:
 public:
   typedef char char_type;
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   mapped_file_region(mapped_file_region&&);
   mapped_file_region &operator =(mapped_file_region&&);
 #endif
diff --git a/include/llvm/Support/GetElementPtrTypeIterator.h b/include/llvm/Support/GetElementPtrTypeIterator.h
index 93dc41fbdc..d1519e479d 100644
--- a/include/llvm/Support/GetElementPtrTypeIterator.h
+++ b/include/llvm/Support/GetElementPtrTypeIterator.h
@@ -15,8 +15,8 @@
 #ifndef LLVM_SUPPORT_GETELEMENTPTRTYPE_H
 #define LLVM_SUPPORT_GETELEMENTPTRTYPE_H
 
-#include "llvm/User.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/User.h"
 
 namespace llvm {
   template<typename ItTy = User::const_op_iterator>
diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h
index f178b0caa8..30cfe6124b 100644
--- a/include/llvm/Support/GraphWriter.h
+++ b/include/llvm/Support/GraphWriter.h
@@ -23,12 +23,12 @@
 #ifndef LLVM_SUPPORT_GRAPHWRITER_H
 #define LLVM_SUPPORT_GRAPHWRITER_H
 
-#include "llvm/Support/DOTGraphTraits.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/GraphTraits.h"
+#include "llvm/Support/DOTGraphTraits.h"
 #include "llvm/Support/Path.h"
-#include <vector>
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
+#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/Support/IntegersSubset.h b/include/llvm/Support/IntegersSubset.h
index 03039fd645..d6a3b2f541 100644
--- a/include/llvm/Support/IntegersSubset.h
+++ b/include/llvm/Support/IntegersSubset.h
@@ -18,11 +18,10 @@
 #ifndef CONSTANTRANGESSET_H_
 #define CONSTANTRANGESSET_H_
 
-#include <list>
-
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/LLVMContext.h"
+#include <list>
 
 namespace llvm {
 
diff --git a/include/llvm/Support/PassNameParser.h b/include/llvm/Support/PassNameParser.h
index a24a6f0c5e..bdfab390b3 100644
--- a/include/llvm/Support/PassNameParser.h
+++ b/include/llvm/Support/PassNameParser.h
@@ -23,8 +23,8 @@
 #ifndef LLVM_SUPPORT_PASS_NAME_PARSER_H
 #define LLVM_SUPPORT_PASS_NAME_PARSER_H
 
-#include "llvm/Pass.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/include/llvm/Support/PatternMatch.h b/include/llvm/Support/PatternMatch.h
index 221fa8b3eb..36b6db7a72 100644
--- a/include/llvm/Support/PatternMatch.h
+++ b/include/llvm/Support/PatternMatch.h
@@ -41,13 +41,13 @@ bool match(Val *V, const Pattern &P) {
   return const_cast<Pattern&>(P).match(V);
 }
 
-  
+
 template<typename SubPattern_t>
 struct OneUse_match {
   SubPattern_t SubPattern;
-  
+
   OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
-  
+
   template<typename OpTy>
   bool match(OpTy *V) {
     return V->hasOneUse() && SubPattern.match(V);
@@ -56,8 +56,8 @@ struct OneUse_match {
 
 template<typename T>
 inline OneUse_match<T> m_OneUse(const T &SubPattern) { return SubPattern; }
-  
-  
+
+
 template<typename Class>
 struct class_match {
   template<typename ITy>
@@ -74,7 +74,7 @@ inline class_match<ConstantInt> m_ConstantInt() {
 inline class_match<UndefValue> m_Undef() { return class_match<UndefValue>(); }
 
 inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
-  
+
 struct match_zero {
   template<typename ITy>
   bool match(ITy *V) {
@@ -83,12 +83,12 @@ struct match_zero {
     return false;
   }
 };
-  
+
 /// m_Zero() - Match an arbitrary zero/null constant.  This includes
 /// zero_initializer for vectors and ConstantPointerNull for pointers.
 inline match_zero m_Zero() { return match_zero(); }
-  
-  
+
+
 struct apint_match {
   const APInt *&Res;
   apint_match(const APInt *&R) : Res(R) {}
@@ -114,12 +114,12 @@ struct apint_match {
     return false;
   }
 };
-  
+
 /// m_APInt - Match a ConstantInt or splatted ConstantVector, binding the
 /// specified pointer to the contained APInt.
 inline apint_match m_APInt(const APInt *&Res) { return Res; }
 
-  
+
 template<int64_t Val>
 struct constantint_match {
   template<typename ITy>
@@ -161,7 +161,7 @@ struct cst_pred_ty : public Predicate {
     return false;
   }
 };
-  
+
 /// api_pred_ty - This helper class is used to match scalar and vector constants
 /// that satisfy a specified predicate, and bind them to an APInt.
 template<typename Predicate>
@@ -175,7 +175,7 @@ struct api_pred_ty : public Predicate {
         Res = &CI->getValue();
         return true;
       }
-    
+
     // FIXME: remove.
     if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
       if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()))
@@ -183,7 +183,7 @@ struct api_pred_ty : public Predicate {
           Res = &CI->getValue();
           return true;
         }
-    
+
     if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(V))
       if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()))
         if (this->isValue(CI->getValue())) {
@@ -194,8 +194,8 @@ struct api_pred_ty : public Predicate {
     return false;
   }
 };
-  
-  
+
+
 struct is_one {
   bool isValue(const APInt &C) { return C == 1; }
 };
@@ -203,11 +203,11 @@ struct is_one {
 /// m_One() - Match an integer 1 or a vector with all elements equal to 1.
 inline cst_pred_ty<is_one> m_One() { return cst_pred_ty<is_one>(); }
 inline api_pred_ty<is_one> m_One(const APInt *&V) { return V; }
-    
+
 struct is_all_ones {
   bool isValue(const APInt &C) { return C.isAllOnesValue(); }
 };
-  
+
 /// m_AllOnes() - Match an integer or vector with all bits set to true.
 inline cst_pred_ty<is_all_ones> m_AllOnes() {return cst_pred_ty<is_all_ones>();}
 inline api_pred_ty<is_all_ones> m_AllOnes(const APInt *&V) { return V; }
@@ -269,7 +269,7 @@ inline specificval_ty m_Specific(const Value *V) { return V; }
 struct bind_const_intval_ty {
   uint64_t &VR;
   bind_const_intval_ty(uint64_t &V) : VR(V) {}
-  
+
   template<typename ITy>
   bool match(ITy *V) {
     if (ConstantInt *CV = dyn_cast<ConstantInt>(V))
@@ -284,7 +284,7 @@ struct bind_const_intval_ty {
 /// m_ConstantInt - Match a ConstantInt and bind to its value.  This does not
 /// match ConstantInts wider than 64-bits.
 inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; }
-  
+
 //===----------------------------------------------------------------------===//
 // Matchers for specific binary operators.
 //
@@ -583,7 +583,7 @@ inline CastClass_match<OpTy, Instruction::BitCast>
 m_BitCast(const OpTy &Op) {
   return CastClass_match<OpTy, Instruction::BitCast>(Op);
 }
-  
+
 /// m_PtrToInt
 template<typename OpTy>
 inline CastClass_match<OpTy, Instruction::PtrToInt>
@@ -611,7 +611,7 @@ inline CastClass_match<OpTy, Instruction::ZExt>
 m_ZExt(const OpTy &Op) {
   return CastClass_match<OpTy, Instruction::ZExt>(Op);
 }
-  
+
 
 //===----------------------------------------------------------------------===//
 // Matchers for unary operators
diff --git a/include/llvm/Support/PredIteratorCache.h b/include/llvm/Support/PredIteratorCache.h
index bb66a8ed58..c5fb780501 100644
--- a/include/llvm/Support/PredIteratorCache.h
+++ b/include/llvm/Support/PredIteratorCache.h
@@ -11,10 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CFG.h"
 
 #ifndef LLVM_SUPPORT_PREDITERATORCACHE_H
 #define LLVM_SUPPORT_PREDITERATORCACHE_H
diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h
index ffe09b19b6..82df2c67bd 100644
--- a/include/llvm/Support/Regex.h
+++ b/include/llvm/Support/Regex.h
@@ -7,7 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements a POSIX regular expression matcher.
+// This file implements a POSIX regular expression matcher.  Both Basic and
+// Extended POSIX regular expressions (ERE) are supported.  EREs were extended
+// to support backreferences in matches.
+// This implementation also supports matching strings with embedded NUL chars.
 //
 //===----------------------------------------------------------------------===//
 
@@ -33,12 +36,14 @@ namespace llvm {
       /// null string after any newline in the string in addition to its normal
       /// function, and the $ anchor matches the null string before any
       /// newline in the string in addition to its normal function.
-      Newline=2
+      Newline=2,
+      /// By default, the POSIX extended regular expression (ERE) syntax is
+      /// assumed. Pass this flag to turn on basic regular expressions (BRE)
+      /// instead.
+      BasicRegex=4
     };
 
-    /// Compiles the given POSIX Extended Regular Expression \p Regex.
-    /// This implementation supports regexes and matching strings with embedded
-    /// NUL characters.
+    /// Compiles the given regular expression \p Regex.
     Regex(StringRef Regex, unsigned Flags = NoFlags);
     ~Regex();
 
diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h
index bcf95f2f6e..bc832e0c9e 100644
--- a/include/llvm/Support/SourceMgr.h
+++ b/include/llvm/Support/SourceMgr.h
@@ -16,8 +16,8 @@
 #ifndef SUPPORT_SOURCEMGR_H
 #define SUPPORT_SOURCEMGR_H
 
-#include "llvm/Support/SMLoc.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/SMLoc.h"
 #include <string>
 
 namespace llvm {
diff --git a/include/llvm/Support/StreamableMemoryObject.h b/include/llvm/Support/StreamableMemoryObject.h
index a2b4bcb9aa..2e1163f2d8 100644
--- a/include/llvm/Support/StreamableMemoryObject.h
+++ b/include/llvm/Support/StreamableMemoryObject.h
@@ -13,8 +13,8 @@
 
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/DataStream.h"
+#include "llvm/Support/MemoryObject.h"
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/Support/StringPool.h b/include/llvm/Support/StringPool.h
index de05e0b547..71adbc5342 100644
--- a/include/llvm/Support/StringPool.h
+++ b/include/llvm/Support/StringPool.h
@@ -30,8 +30,8 @@
 #define LLVM_SUPPORT_STRINGPOOL_H
 
 #include "llvm/ADT/StringMap.h"
-#include <new>
 #include <cassert>
+#include <new>
 
 namespace llvm {
 
diff --git a/include/llvm/Support/TargetFolder.h b/include/llvm/Support/TargetFolder.h
index 45f7816336..f2d83a4565 100644
--- a/include/llvm/Support/TargetFolder.h
+++ b/include/llvm/Support/TargetFolder.h
@@ -19,10 +19,10 @@
 #ifndef LLVM_SUPPORT_TARGETFOLDER_H
 #define LLVM_SUPPORT_TARGETFOLDER_H
 
-#include "llvm/Constants.h"
-#include "llvm/InstrTypes.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Constants.h"
+#include "llvm/InstrTypes.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h
index ca58bfb0d7..701e6a9575 100644
--- a/include/llvm/Support/TargetRegistry.h
+++ b/include/llvm/Support/TargetRegistry.h
@@ -19,10 +19,10 @@
 #ifndef LLVM_SUPPORT_TARGETREGISTRY_H
 #define LLVM_SUPPORT_TARGETREGISTRY_H
 
-#include "llvm/Support/CodeGen.h"
 #include "llvm/ADT/Triple.h"
-#include <string>
+#include "llvm/Support/CodeGen.h"
 #include <cassert>
+#include <string>
 
 namespace llvm {
   class AsmPrinter;
diff --git a/include/llvm/Support/ThreadLocal.h b/include/llvm/Support/ThreadLocal.h
index 62ec90ad24..01f735c921 100644
--- a/include/llvm/Support/ThreadLocal.h
+++ b/include/llvm/Support/ThreadLocal.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_SYSTEM_THREAD_LOCAL_H
 #define LLVM_SYSTEM_THREAD_LOCAL_H
 
-#include "llvm/Support/Threading.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Threading.h"
 #include <cassert>
 
 namespace llvm {
diff --git a/include/llvm/Support/Timer.h b/include/llvm/Support/Timer.h
index a7418827ca..789c05f44f 100644
--- a/include/llvm/Support/Timer.h
+++ b/include/llvm/Support/Timer.h
@@ -15,13 +15,13 @@
 #ifndef LLVM_SUPPORT_TIMER_H
 #define LLVM_SUPPORT_TIMER_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/ADT/StringRef.h"
 #include <cassert>
 #include <string>
-#include <vector>
 #include <utility>
+#include <vector>
 
 namespace llvm {
 
diff --git a/include/llvm/Support/Valgrind.h b/include/llvm/Support/Valgrind.h
index e147647039..a1397db8eb 100644
--- a/include/llvm/Support/Valgrind.h
+++ b/include/llvm/Support/Valgrind.h
@@ -16,8 +16,8 @@
 #ifndef LLVM_SYSTEM_VALGRIND_H
 #define LLVM_SYSTEM_VALGRIND_H
 
-#include "llvm/Support/Compiler.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Compiler.h"
 #include <stddef.h>
 
 #if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG)
diff --git a/include/llvm/Support/Win64EH.h b/include/llvm/Support/Win64EH.h
index 8d74e10be0..164aca16bf 100644
--- a/include/llvm/Support/Win64EH.h
+++ b/include/llvm/Support/Win64EH.h
@@ -17,6 +17,7 @@
 #define LLVM_SUPPORT_WIN64EH_H
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Endian.h"
 
 namespace llvm {
 namespace Win64EH {
@@ -39,11 +40,17 @@ enum UnwindOpcodes {
 /// or part thereof.
 union UnwindCode {
   struct {
-    uint8_t codeOffset;
-    uint8_t unwindOp:4,
-            opInfo:4;
+    support::ulittle8_t CodeOffset;
+    support::ulittle8_t UnwindOpAndOpInfo;
   } u;
-  uint16_t frameOffset;
+  support::ulittle16_t FrameOffset;
+
+  uint8_t getUnwindOp() const {
+    return u.UnwindOpAndOpInfo & 0x0F;
+  }
+  uint8_t getOpInfo() const {
+    return (u.UnwindOpAndOpInfo >> 4) & 0x0F;
+  }
 };
 
 enum {
@@ -60,37 +67,65 @@ enum {
 
 /// RuntimeFunction - An entry in the table of functions with unwind info.
 struct RuntimeFunction {
-  uint64_t startAddress;
-  uint64_t endAddress;
-  uint64_t unwindInfoOffset;
+  support::ulittle32_t StartAddress;
+  support::ulittle32_t EndAddress;
+  support::ulittle32_t UnwindInfoOffset;
 };
 
 /// UnwindInfo - An entry in the exception table.
 struct UnwindInfo {
-  uint8_t version:3,
-          flags:5;
-  uint8_t prologSize;
-  uint8_t numCodes;
-  uint8_t frameRegister:4,
-          frameOffset:4;
-  UnwindCode unwindCodes[1];
+  support::ulittle8_t VersionAndFlags;
+  support::ulittle8_t PrologSize;
+  support::ulittle8_t NumCodes;
+  support::ulittle8_t FrameRegisterAndOffset;
+  UnwindCode UnwindCodes[1];
 
-  void *getLanguageSpecificData() {
-    return reinterpret_cast<void *>(&unwindCodes[(numCodes+1) & ~1]);
+  uint8_t getVersion() const {
+    return VersionAndFlags & 0x07;
   }
-  uint64_t getLanguageSpecificHandlerOffset() {
-    return *reinterpret_cast<uint64_t *>(getLanguageSpecificData());
+  uint8_t getFlags() const {
+    return (VersionAndFlags >> 3) & 0x1f;
   }
-  void setLanguageSpecificHandlerOffset(uint64_t offset) {
-    *reinterpret_cast<uint64_t *>(getLanguageSpecificData()) = offset;
+  uint8_t getFrameRegister() const {
+    return FrameRegisterAndOffset & 0x0f;
   }
-  RuntimeFunction *getChainedFunctionEntry() {
-    return reinterpret_cast<RuntimeFunction *>(getLanguageSpecificData());
+  uint8_t getFrameOffset() const {
+    return (FrameRegisterAndOffset >> 4) & 0x0f;
+  }
+
+  // The data after unwindCodes depends on flags.
+  // If UNW_ExceptionHandler or UNW_TerminateHandler is set then follows
+  // the address of the language-specific exception handler.
+  // If UNW_ChainInfo is set then follows a RuntimeFunction which defines
+  // the chained unwind info.
+  // For more information please see MSDN at:
+  // http://msdn.microsoft.com/en-us/library/ddssxxy8.aspx
+
+  /// \brief Return pointer to language specific data part of UnwindInfo.
+  void *getLanguageSpecificData() {
+    return reinterpret_cast<void *>(&UnwindCodes[(NumCodes+1) & ~1]);
   }
+
+  /// \brief Return image-relativ offset of language-specific exception handler.
+  uint32_t getLanguageSpecificHandlerOffset() {
+    return *reinterpret_cast<uint32_t *>(getLanguageSpecificData());
+  }
+
+  /// \brief Set image-relativ offset of language-specific exception handler.
+  void setLanguageSpecificHandlerOffset(uint32_t offset) {
+    *reinterpret_cast<uint32_t *>(getLanguageSpecificData()) = offset;
+  }
+
+  /// \brief Return pointer to exception-specific data.
   void *getExceptionData() {
-    return reinterpret_cast<void *>(reinterpret_cast<uint64_t *>(
+    return reinterpret_cast<void *>(reinterpret_cast<uint32_t *>(
                                                   getLanguageSpecificData())+1);
   }
+
+  /// \brief Return pointer to chained unwind info.
+  RuntimeFunction *getChainedFunctionEntry() {
+    return reinterpret_cast<RuntimeFunction *>(getLanguageSpecificData());
+  }
 };
 
 
diff --git a/include/llvm/Support/YAMLParser.h b/include/llvm/Support/YAMLParser.h
index e3f4f00388..03cbe33882 100644
--- a/include/llvm/Support/YAMLParser.h
+++ b/include/llvm/Support/YAMLParser.h
@@ -43,7 +43,6 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/SMLoc.h"
-
 #include <limits>
 #include <utility>
 
diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h
index 319298c132..647c47f27c 100644
--- a/include/llvm/TableGen/Record.h
+++ b/include/llvm/TableGen/Record.h
@@ -19,9 +19,9 @@
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include <map>
 
diff --git a/include/llvm/TableGen/StringMatcher.h b/include/llvm/TableGen/StringMatcher.h
index 1dadc76200..1b0572faa2 100644
--- a/include/llvm/TableGen/StringMatcher.h
+++ b/include/llvm/TableGen/StringMatcher.h
@@ -14,10 +14,10 @@
 #ifndef STRINGMATCHER_H
 #define STRINGMATCHER_H
 
-#include <vector>
+#include "llvm/ADT/StringRef.h"
 #include <string>
 #include <utility>
-#include "llvm/ADT/StringRef.h"
+#include <vector>
 
 namespace llvm {
   class raw_ostream;
diff --git a/include/llvm/Target/TargetFrameLowering.h b/include/llvm/Target/TargetFrameLowering.h
index 7df3bfa473..6a35fdef7d 100644
--- a/include/llvm/Target/TargetFrameLowering.h
+++ b/include/llvm/Target/TargetFrameLowering.h
@@ -15,7 +15,6 @@
 #define LLVM_TARGET_TARGETFRAMELOWERING_H
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
-
 #include <utility>
 #include <vector>
 
@@ -48,19 +47,19 @@ private:
   unsigned StackAlignment;
   unsigned TransientStackAlignment;
   int LocalAreaOffset;
-
+  bool StackRealignable;
+  
   // @LOCALMOD-BEGIN
   // TODO(pdox): Refactor this and upstream it, to get rid of the
   // assumption that StackSlotSize == PointerSize.
   unsigned StackSlotSize;
   // @LOCALMOD-END
 public:
-  TargetFrameLowering(StackDirection D,
-                      unsigned StackAl, int LAO,
-                      unsigned TransAl = 1,
+  TargetFrameLowering(StackDirection D, unsigned StackAl, int LAO,
+                      unsigned TransAl = 1, bool StackReal = true,
                       unsigned SlotSize = 0) // @LOCALMOD
     : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl),
-      LocalAreaOffset(LAO), StackSlotSize(SlotSize) {}
+      LocalAreaOffset(LAO), StackRealignable(StackReal), StackSlotSize(SlotSize) {}
 
   virtual ~TargetFrameLowering();
 
@@ -90,6 +89,12 @@ public:
     return TransientStackAlignment;
   }
 
+  /// isStackRealignable - This method returns whether the stack can be
+  /// realigned.
+  bool isStackRealignable() const {
+    return StackRealignable;
+  }
+
   /// getOffsetOfLocalArea - This method returns the offset of the local area
   /// from the stack pointer on entrance to a function.
   ///
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index d2e06114d8..fb311d8508 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -15,9 +15,9 @@
 #define LLVM_TARGET_TARGETINSTRINFO_H
 
 #include "llvm/ADT/SmallSet.h"
-#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/MC/MCInstrInfo.h"
 
 namespace llvm {
 
@@ -143,9 +143,7 @@ public:
   /// missed.
   virtual bool hasLoadFromStackSlot(const MachineInstr *MI,
                                     const MachineMemOperand *&MMO,
-                                    int &FrameIndex) const {
-    return 0;
-  }
+                                    int &FrameIndex) const;
 
   /// isStoreToStackSlot - If the specified machine instruction is a direct
   /// store to a stack slot, return the virtual or physical register number of
@@ -173,9 +171,7 @@ public:
   /// stack.  This is just a hint, as some cases may be missed.
   virtual bool hasStoreToStackSlot(const MachineInstr *MI,
                                    const MachineMemOperand *&MMO,
-                                   int &FrameIndex) const {
-    return 0;
-  }
+                                   int &FrameIndex) const;
 
   /// reMaterialize - Re-issue the specified 'original' instruction at the
   /// specific location targeting a new destination register.
@@ -186,7 +182,7 @@ public:
                              MachineBasicBlock::iterator MI,
                              unsigned DestReg, unsigned SubIdx,
                              const MachineInstr *Orig,
-                             const TargetRegisterInfo &TRI) const = 0;
+                             const TargetRegisterInfo &TRI) const;
 
   /// duplicate - Create a duplicate of the Orig instruction in MF. This is like
   /// MachineFunction::CloneMachineInstr(), but the target may update operands
@@ -194,7 +190,7 @@ public:
   ///
   /// The instruction must be duplicable as indicated by isNotDuplicable().
   virtual MachineInstr *duplicate(MachineInstr *Orig,
-                                  MachineFunction &MF) const = 0;
+                                  MachineFunction &MF) const;
 
   /// convertToThreeAddress - This method must be implemented by targets that
   /// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
@@ -221,13 +217,13 @@ public:
   /// method for a non-commutable instruction, but there may be some cases
   /// where this method fails and returns null.
   virtual MachineInstr *commuteInstruction(MachineInstr *MI,
-                                           bool NewMI = false) const = 0;
+                                           bool NewMI = false) const;
 
   /// findCommutedOpIndices - If specified MI is commutable, return the two
   /// operand indices that would swap value. Return false if the instruction
   /// is not in a form which this routine understands.
   virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
-                                     unsigned &SrcOpIdx2) const = 0;
+                                     unsigned &SrcOpIdx2) const;
 
   /// produceSameValue - Return true if two machine instructions would produce
   /// identical values. By default, this is only true when the two instructions
@@ -236,7 +232,7 @@ public:
   /// aggressive checks.
   virtual bool produceSameValue(const MachineInstr *MI0,
                                 const MachineInstr *MI1,
-                                const MachineRegisterInfo *MRI = 0) const = 0;
+                                const MachineRegisterInfo *MRI = 0) const;
 
   /// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
   /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
@@ -298,7 +294,7 @@ public:
   /// after it, replacing it with an unconditional branch to NewDest. This is
   /// used by the tail merging pass.
   virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
-                                       MachineBasicBlock *NewDest) const = 0;
+                                       MachineBasicBlock *NewDest) const;
 
   /// isLegalToSplitMBBAt - Return true if it's legal to split the given basic
   /// block at the specified instruction (i.e. instruction would be the start
@@ -569,7 +565,7 @@ public:
   /// folding is possible.
   virtual
   bool canFoldMemoryOperand(const MachineInstr *MI,
-                            const SmallVectorImpl<unsigned> &Ops) const =0;
+                            const SmallVectorImpl<unsigned> &Ops) const;
 
   /// unfoldMemoryOperand - Separate a single instruction which folded a load or
   /// a store or a load and a store into two or more instruction. If this is
@@ -669,13 +665,13 @@ public:
 
   /// isUnpredicatedTerminator - Returns true if the instruction is a
   /// terminator instruction that has not been predicated.
-  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const = 0;
+  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
 
   /// PredicateInstruction - Convert the instruction into a predicated
   /// instruction. It returns true if the operation was successful.
   virtual
   bool PredicateInstruction(MachineInstr *MI,
-                        const SmallVectorImpl<MachineOperand> &Pred) const = 0;
+                        const SmallVectorImpl<MachineOperand> &Pred) const;
 
   /// SubsumesPredicate - Returns true if the first specified predicate
   /// subsumes the second, e.g. GE subsumes GT.
@@ -711,7 +707,7 @@ public:
   /// terminators.
   virtual bool isSchedulingBoundary(const MachineInstr *MI,
                                     const MachineBasicBlock *MBB,
-                                    const MachineFunction &MF) const = 0;
+                                    const MachineFunction &MF) const;
 
   /// Measure the specified inline asm to determine an approximation of its
   /// length.
@@ -723,21 +719,25 @@ public:
   /// register allocation.
   virtual ScheduleHazardRecognizer*
   CreateTargetHazardRecognizer(const TargetMachine *TM,
-                               const ScheduleDAG *DAG) const = 0;
+                               const ScheduleDAG *DAG) const;
 
   /// CreateTargetMIHazardRecognizer - Allocate and return a hazard recognizer
   /// to use for this target when scheduling the machine instructions before
   /// register allocation.
   virtual ScheduleHazardRecognizer*
   CreateTargetMIHazardRecognizer(const InstrItineraryData*,
-                                 const ScheduleDAG *DAG) const = 0;
+                                 const ScheduleDAG *DAG) const;
 
   /// CreateTargetPostRAHazardRecognizer - Allocate and return a hazard
   /// recognizer to use for this target when scheduling the machine instructions
   /// after register allocation.
   virtual ScheduleHazardRecognizer*
   CreateTargetPostRAHazardRecognizer(const InstrItineraryData*,
-                                     const ScheduleDAG *DAG) const = 0;
+                                     const ScheduleDAG *DAG) const;
+
+  /// Provide a global flag for disabling the PreRA hazard recognizer that
+  /// targets may choose to honor.
+  bool usePreRAHazardRecognizer() const;
 
   /// analyzeCompare - For a comparison instruction, return the source registers
   /// in SrcReg and SrcReg2 if having two register operands, and the value it
@@ -785,7 +785,7 @@ public:
   /// IssueWidth is the number of microops that can be dispatched each
   /// cycle. An instruction with zero microops takes no dispatch resources.
   virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
-                                  const MachineInstr *MI) const = 0;
+                                  const MachineInstr *MI) const;
 
   /// isZeroCost - Return true for pseudo instructions that don't consume any
   /// machine resources in their current form. These are common cases that the
@@ -797,7 +797,7 @@ public:
 
   virtual int getOperandLatency(const InstrItineraryData *ItinData,
                                 SDNode *DefNode, unsigned DefIdx,
-                                SDNode *UseNode, unsigned UseIdx) const = 0;
+                                SDNode *UseNode, unsigned UseIdx) const;
 
   /// getOperandLatency - Compute and return the use operand latency of a given
   /// pair of def and use.
@@ -810,7 +810,7 @@ public:
   virtual int getOperandLatency(const InstrItineraryData *ItinData,
                                 const MachineInstr *DefMI, unsigned DefIdx,
                                 const MachineInstr *UseMI,
-                                unsigned UseIdx) const = 0;
+                                unsigned UseIdx) const;
 
   /// computeOperandLatency - Compute and return the latency of the given data
   /// dependent def and use when the operand indices are already known.
@@ -826,10 +826,10 @@ public:
   /// PredCost.
   virtual unsigned getInstrLatency(const InstrItineraryData *ItinData,
                                    const MachineInstr *MI,
-                                   unsigned *PredCost = 0) const = 0;
+                                   unsigned *PredCost = 0) const;
 
   virtual int getInstrLatency(const InstrItineraryData *ItinData,
-                              SDNode *Node) const = 0;
+                              SDNode *Node) const;
 
   /// Return the default expected latency for a def based on it's opcode.
   unsigned defaultDefLatency(const MCSchedModel *SchedModel,
@@ -859,7 +859,7 @@ public:
   /// if the target considered it 'low'.
   virtual
   bool hasLowDefLatency(const InstrItineraryData *ItinData,
-                        const MachineInstr *DefMI, unsigned DefIdx) const = 0;
+                        const MachineInstr *DefMI, unsigned DefIdx) const;
 
   /// verifyInstruction - Perform target specific instruction verification.
   virtual
@@ -976,84 +976,6 @@ private:
   int CallFrameSetupOpcode, CallFrameDestroyOpcode;
 };
 
-/// TargetInstrInfoImpl - This is the default implementation of
-/// TargetInstrInfo, which just provides a couple of default implementations
-/// for various methods.  This separated out because it is implemented in
-/// libcodegen, not in libtarget.
-class TargetInstrInfoImpl : public TargetInstrInfo {
-protected:
-  TargetInstrInfoImpl(int CallFrameSetupOpcode = -1,
-                      int CallFrameDestroyOpcode = -1)
-    : TargetInstrInfo(CallFrameSetupOpcode, CallFrameDestroyOpcode) {}
-public:
-  virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
-                                       MachineBasicBlock *NewDest) const;
-  virtual MachineInstr *commuteInstruction(MachineInstr *MI,
-                                           bool NewMI = false) const;
-  virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
-                                     unsigned &SrcOpIdx2) const;
-  virtual bool canFoldMemoryOperand(const MachineInstr *MI,
-                                    const SmallVectorImpl<unsigned> &Ops) const;
-  virtual bool hasLoadFromStackSlot(const MachineInstr *MI,
-                                    const MachineMemOperand *&MMO,
-                                    int &FrameIndex) const;
-  virtual bool hasStoreToStackSlot(const MachineInstr *MI,
-                                   const MachineMemOperand *&MMO,
-                                   int &FrameIndex) const;
-  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
-  virtual bool PredicateInstruction(MachineInstr *MI,
-                            const SmallVectorImpl<MachineOperand> &Pred) const;
-  virtual void reMaterialize(MachineBasicBlock &MBB,
-                             MachineBasicBlock::iterator MI,
-                             unsigned DestReg, unsigned SubReg,
-                             const MachineInstr *Orig,
-                             const TargetRegisterInfo &TRI) const;
-  virtual MachineInstr *duplicate(MachineInstr *Orig,
-                                  MachineFunction &MF) const;
-  virtual bool produceSameValue(const MachineInstr *MI0,
-                                const MachineInstr *MI1,
-                                const MachineRegisterInfo *MRI) const;
-  virtual bool isSchedulingBoundary(const MachineInstr *MI,
-                                    const MachineBasicBlock *MBB,
-                                    const MachineFunction &MF) const;
-
-  virtual int getOperandLatency(const InstrItineraryData *ItinData,
-                                SDNode *DefNode, unsigned DefIdx,
-                                SDNode *UseNode, unsigned UseIdx) const;
-
-  virtual int getInstrLatency(const InstrItineraryData *ItinData,
-                              SDNode *Node) const;
-
-  virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
-                                  const MachineInstr *MI) const;
-
-  virtual unsigned getInstrLatency(const InstrItineraryData *ItinData,
-                                   const MachineInstr *MI,
-                                   unsigned *PredCost = 0) const;
-
-  virtual
-  bool hasLowDefLatency(const InstrItineraryData *ItinData,
-                        const MachineInstr *DefMI, unsigned DefIdx) const;
-
-  virtual int getOperandLatency(const InstrItineraryData *ItinData,
-                                const MachineInstr *DefMI, unsigned DefIdx,
-                                const MachineInstr *UseMI,
-                                unsigned UseIdx) const;
-
-  bool usePreRAHazardRecognizer() const;
-
-  virtual ScheduleHazardRecognizer *
-  CreateTargetHazardRecognizer(const TargetMachine*, const ScheduleDAG*) const;
-
-  virtual ScheduleHazardRecognizer *
-  CreateTargetMIHazardRecognizer(const InstrItineraryData*,
-                                 const ScheduleDAG*) const;
-
-  virtual ScheduleHazardRecognizer *
-  CreateTargetPostRAHazardRecognizer(const InstrItineraryData*,
-                                     const ScheduleDAG*) const;
-};
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Target/TargetJITInfo.h b/include/llvm/Target/TargetJITInfo.h
index c2bb376131..f46c956ea0 100644
--- a/include/llvm/Target/TargetJITInfo.h
+++ b/include/llvm/Target/TargetJITInfo.h
@@ -17,8 +17,8 @@
 #ifndef LLVM_TARGET_TARGETJITINFO_H
 #define LLVM_TARGET_TARGETJITINFO_H
 
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 
 namespace llvm {
diff --git a/include/llvm/Target/TargetLibraryInfo.h b/include/llvm/Target/TargetLibraryInfo.h
index f1dd1f4bbe..ceeba4f6e9 100644
--- a/include/llvm/Target/TargetLibraryInfo.h
+++ b/include/llvm/Target/TargetLibraryInfo.h
@@ -10,8 +10,8 @@
 #ifndef LLVM_TARGET_TARGETLIBRARYINFO_H
 #define LLVM_TARGET_TARGETLIBRARYINFO_H
 
-#include "llvm/Pass.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
   class Triple;
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index f8925f25a1..42f771bf5e 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -22,14 +22,14 @@
 #ifndef LLVM_TARGET_TARGETLOWERING_H
 #define LLVM_TARGET_TARGETLOWERING_H
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/AddressingMode.h"
+#include "llvm/Attributes.h"
 #include "llvm/CallingConv.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/InlineAsm.h"
-#include "llvm/Attributes.h"
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/Support/DebugLoc.h"
 #include "llvm/Target/TargetCallingConv.h"
 #include "llvm/Target/TargetMachine.h"
@@ -171,6 +171,11 @@ public:
 
   virtual bool isSelectSupported(SelectSupportKind kind) const { return true; }
 
+  /// shouldSplitVectorElementType - Return true if a vector of the given type
+  /// should be split (TypeSplitVector) instead of promoted
+  /// (TypePromoteInteger) during type legalization.
+  virtual bool shouldSplitVectorElementType(EVT VT) const { return false; }
+
   /// isIntDivCheap() - Return true if integer divide is usually cheaper than
   /// a sequence of several shifts, adds, and multiplies for this target.
   bool isIntDivCheap() const { return IntDivIsCheap; }
@@ -378,6 +383,16 @@ public:
     return false;
   }
 
+  /// isIntImmLegal - Returns true if the target can instruction select the
+  /// specified integer immediate natively (that is, it's materialized with one
+  /// instruction). The current *assumption* in isel is all of integer
+  /// immediates are "legal" and only the memcpy / memset expansion code is
+  /// making use of this. The rest of isel doesn't have proper cost model for
+  /// immediate materialization.
+  virtual bool isIntImmLegal(const APInt &/*Imm*/, EVT /*VT*/) const {
+    return true;
+  }
+
   /// isShuffleMaskLegal - Targets can use this to indicate that they only
   /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
   /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
@@ -685,12 +700,14 @@ public:
   }
 
   /// This function returns true if the target allows unaligned memory accesses.
-  /// of the specified type. This is used, for example, in situations where an
-  /// array copy/move/set is  converted to a sequence of store operations. It's
-  /// use helps to ensure that such replacements don't generate code that causes
-  /// an alignment error  (trap) on the target machine.
+  /// of the specified type. If true, it also returns whether the unaligned
+  /// memory access is "fast" in the second argument by reference. This is used,
+  /// for example, in situations where an array copy/move/set is  converted to a
+  /// sequence of store operations. It's use helps to ensure that such
+  /// replacements don't generate code that causes an alignment error  (trap) on
+  /// the target machine.
   /// @brief Determine if the target supports unaligned memory accesses.
-  virtual bool allowsUnalignedMemoryAccesses(EVT) const {
+  virtual bool allowsUnalignedMemoryAccesses(EVT, bool *Fast = 0) const {
     return false;
   }
 
@@ -1720,6 +1737,13 @@ public:
     return false;
   }
 
+  /// isZExtFree - Return true if zero-extending the specific node Val to type
+  /// VT2 is free (either because it's implicitly zero-extended such as ARM
+  /// ldrb / ldrh or because it's folded such as X86 zero-extending loads).
+  virtual bool isZExtFree(SDValue Val, EVT VT2) const {
+    return isZExtFree(Val.getValueType(), VT2);
+  }
+
   /// isFNegFree - Return true if an fneg operation is free to the point where
   /// it is never worthwhile to replace it with a bitwise operation.
   virtual bool isFNegFree(EVT) const {
@@ -1753,9 +1777,9 @@ public:
   SDValue BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
                          SelectionDAG &DAG) const;
   SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
-                      std::vector<SDNode*>* Created) const;
+                      std::vector<SDNode*> *Created) const;
   SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
-                      std::vector<SDNode*>* Created) const;
+                      std::vector<SDNode*> *Created) const;
 
 
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h
index fab63254d9..0df793dfd6 100644
--- a/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/include/llvm/Target/TargetLoweringObjectFile.h
@@ -15,10 +15,10 @@
 #ifndef LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H
 #define LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H
 
-#include "llvm/Module.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/SectionKind.h"
-#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Module.h"
 
 namespace llvm {
   class MachineModuleInfo;
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index 50066473b5..a891626c37 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -14,12 +14,12 @@
 #ifndef LLVM_TARGET_TARGETMACHINE_H
 #define LLVM_TARGET_TARGETMACHINE_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/TargetTransformInfo.h"
 #include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/ADT/StringRef.h"
+#include "llvm/TargetTransformInfo.h"
 #include <cassert>
 #include <string>
 
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index 0a1b73e352..ec4f2558e6 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -24,7 +24,7 @@ namespace llvm {
   // Possible float ABI settings. Used with FloatABIType in TargetOptions.h.
   namespace FloatABI {
     enum ABIType {
-      Default, // Target-specific (either soft or hard depending on triple, etc).
+      Default, // Target-specific (either soft or hard depending on triple,etc).
       Soft, // Soft float.
       Hard  // Hard float.
     };
@@ -56,7 +56,7 @@ namespace llvm {
           GuaranteedTailCallOpt(false), DisableTailCalls(false),
           StackAlignmentOverride(0), RealignStack(true), EnableFastISel(false),
           PositionIndependentExecutable(false), EnableSegmentedStacks(false),
-          UseInitArray(false), TrapFuncName(""), FloatABIType(FloatABI::Default),
+          UseInitArray(false), TrapFuncName(""),FloatABIType(FloatABI::Default),
           AllowFPOpFusion(FPOpFusion::Standard)
     {}
 
@@ -208,10 +208,10 @@ namespace llvm {
     /// Strict mode - allow fusion only if/when it can be proven that the excess
     /// precision won't effect the result.
     ///
-    /// Note: This option only controls formation of fused ops by the optimizers.
-    /// Fused operations that are explicitly specified (e.g. FMA via the
-    /// llvm.fma.* intrinsic) will always be honored, regardless of the value of
-    /// this option.
+    /// Note: This option only controls formation of fused ops by the
+    /// optimizers.  Fused operations that are explicitly specified (e.g. FMA
+    /// via the llvm.fma.* intrinsic) will always be honored, regardless of
+    /// the value of this option.
     FPOpFusion::FPOpFusionMode AllowFPOpFusion;
 
   };
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index afa2ee2744..40a7505f67 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -16,11 +16,11 @@
 #ifndef LLVM_TARGET_TARGETREGISTERINFO_H
 #define LLVM_TARGET_TARGETREGISTERINFO_H
 
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/CallingConv.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include <cassert>
 #include <functional>
 
@@ -30,12 +30,13 @@ class BitVector;
 class MachineFunction;
 class RegScavenger;
 template<class T> class SmallVectorImpl;
+class VirtRegMap;
 class raw_ostream;
 
 class TargetRegisterClass {
 public:
-  typedef const uint16_t* iterator;
-  typedef const uint16_t* const_iterator;
+  typedef const MCPhysReg* iterator;
+  typedef const MCPhysReg* const_iterator;
   typedef const MVT::SimpleValueType* vt_iterator;
   typedef const TargetRegisterClass* const * sc_iterator;
 
@@ -45,7 +46,7 @@ public:
   const uint32_t *SubClassMask;
   const uint16_t *SuperRegIndices;
   const sc_iterator SuperClasses;
-  ArrayRef<uint16_t> (*OrderFunc)(const MachineFunction&);
+  ArrayRef<MCPhysReg> (*OrderFunc)(const MachineFunction&);
 
   /// getID() - Return the register class ID number.
   ///
@@ -190,7 +191,7 @@ public:
   ///
   /// By default, this method returns all registers in the class.
   ///
-  ArrayRef<uint16_t> getRawAllocationOrder(const MachineFunction &MF) const {
+  ArrayRef<MCPhysReg> getRawAllocationOrder(const MachineFunction &MF) const {
     return OrderFunc ? OrderFunc(MF) : makeArrayRef(begin(), getNumRegs());
   }
 };
@@ -407,7 +408,7 @@ public:
   /// order of desired callee-save stack frame offset. The first register is
   /// closest to the incoming stack pointer if stack grows down, and vice versa.
   ///
-  virtual const uint16_t* getCalleeSavedRegs(const MachineFunction *MF = 0)
+  virtual const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF = 0)
                                                                       const = 0;
 
   /// getCallPreservedMask - Return a mask of call-preserved registers for the
@@ -594,10 +595,13 @@ public:
     return 0;
   }
 
-// Get the weight in units of pressure for this register class.
+  /// Get the weight in units of pressure for this register class.
   virtual const RegClassWeight &getRegClassWeight(
     const TargetRegisterClass *RC) const = 0;
 
+  /// Get the weight in units of pressure for this register unit.
+  virtual unsigned getRegUnitWeight(unsigned RegUnit) const = 0;
+
   /// Get the number of dimensions of register pressure.
   virtual unsigned getNumRegPressureSets() const = 0;
 
@@ -613,27 +617,29 @@ public:
   virtual const int *getRegClassPressureSets(
     const TargetRegisterClass *RC) const = 0;
 
-  /// getRawAllocationOrder - Returns the register allocation order for a
-  /// specified register class with a target-dependent hint. The returned list
-  /// may contain reserved registers that cannot be allocated.
-  ///
-  /// Register allocators need only call this function to resolve
-  /// target-dependent hints, but it should work without hinting as well.
-  virtual ArrayRef<uint16_t>
-  getRawAllocationOrder(const TargetRegisterClass *RC,
-                        unsigned HintType, unsigned HintReg,
-                        const MachineFunction &MF) const {
-    return RC->getRawAllocationOrder(MF);
-  }
-
-  /// ResolveRegAllocHint - Resolves the specified register allocation hint
-  /// to a physical register. Returns the physical register if it is successful.
-  virtual unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
-                                       const MachineFunction &MF) const {
-    if (Type == 0 && Reg && isPhysicalRegister(Reg))
-      return Reg;
-    return 0;
-  }
+  /// Get the dimensions of register pressure impacted by this register unit.
+  /// Returns a -1 terminated array of pressure set IDs.
+  virtual const int *getRegUnitPressureSets(unsigned RegUnit) const = 0;
+
+  /// Get a list of 'hint' registers that the register allocator should try
+  /// first when allocating a physical register for the virtual register
+  /// VirtReg. These registers are effectively moved to the front of the
+  /// allocation order.
+  ///
+  /// The Order argument is the allocation order for VirtReg's register class
+  /// as returned from RegisterClassInfo::getOrder(). The hint registers must
+  /// come from Order, and they must not be reserved.
+  ///
+  /// The default implementation of this function can resolve
+  /// target-independent hints provided to MRI::setRegAllocationHint with
+  /// HintType == 0. Targets that override this function should defer to the
+  /// default implementation if they have no reason to change the allocation
+  /// order for VirtReg. There may be target-independent hints.
+  virtual void getRegAllocationHints(unsigned VirtReg,
+                                     ArrayRef<MCPhysReg> Order,
+                                     SmallVectorImpl<MCPhysReg> &Hints,
+                                     const MachineFunction &MF,
+                                     const VirtRegMap *VRM = 0) const;
 
   /// avoidWriteAfterWrite - Return true if the register allocator should avoid
   /// writing a register from RC in two consecutive instructions.
@@ -876,7 +882,8 @@ class PrintReg {
   unsigned Reg;
   unsigned SubIdx;
 public:
-  PrintReg(unsigned reg, const TargetRegisterInfo *tri = 0, unsigned subidx = 0)
+  explicit PrintReg(unsigned reg, const TargetRegisterInfo *tri = 0,
+                    unsigned subidx = 0)
     : TRI(tri), Reg(reg), SubIdx(subidx) {}
   void print(raw_ostream&) const;
 };
diff --git a/include/llvm/Target/TargetTransformImpl.h b/include/llvm/Target/TargetTransformImpl.h
index 7ea2396076..a94278cfa3 100644
--- a/include/llvm/Target/TargetTransformImpl.h
+++ b/include/llvm/Target/TargetTransformImpl.h
@@ -15,8 +15,8 @@
 #ifndef LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H
 #define LLVM_TARGET_TARGET_TRANSFORMATION_IMPL_H
 
-#include "llvm/TargetTransformInfo.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/TargetTransformInfo.h"
 
 namespace llvm {
 
@@ -26,7 +26,7 @@ class TargetLowering;
 /// ScalarTargetTransformInfo interface. Different targets can implement
 /// this interface differently.
 class ScalarTargetTransformImpl : public ScalarTargetTransformInfo {
-private:
+protected:
   const TargetLowering *TLI;
 
 public:
@@ -90,6 +90,9 @@ public:
                                    unsigned Alignment,
                                    unsigned AddressSpace) const;
 
+  virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
+                                         ArrayRef<Type*> Tys) const;
+
   virtual unsigned getNumberOfParts(Type *Tp) const;
 };
 
diff --git a/include/llvm/TargetTransformInfo.h b/include/llvm/TargetTransformInfo.h
index 94db490443..519ccb9263 100644
--- a/include/llvm/TargetTransformInfo.h
+++ b/include/llvm/TargetTransformInfo.h
@@ -22,8 +22,9 @@
 #ifndef LLVM_TRANSFORMS_TARGET_TRANSFORM_INTERFACE
 #define LLVM_TRANSFORMS_TARGET_TRANSFORM_INTERFACE
 
-#include "llvm/Pass.h"
 #include "llvm/AddressingMode.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Type.h"
 
@@ -75,6 +76,18 @@ public:
 /// LSR, and LowerInvoke use this interface.
 class ScalarTargetTransformInfo {
 public:
+  /// PopcntHwSupport - Hardware support for population count. Compared to the
+  /// SW implementation, HW support is supposed to significantly boost the
+  /// performance when the population is dense, and it may or not may degrade
+  /// performance if the population is sparse. A HW support is considered as
+  /// "Fast" if it can outperform, or is on a par with, SW implementaion when
+  /// the population is sparse; otherwise, it is considered as "Slow".
+  enum PopcntHwSupport {
+    None,
+    Fast,
+    Slow
+  };
+
   virtual ~ScalarTargetTransformInfo() {}
 
   /// isLegalAddImmediate - Return true if the specified immediate is legal
@@ -122,6 +135,11 @@ public:
   virtual bool shouldBuildLookupTables() const {
     return true;
   }
+
+  /// getPopcntHwSupport - Return hardware support for population count.
+  virtual PopcntHwSupport getPopcntHwSupport(unsigned IntTyWidthInBit) const {
+    return None;
+  }
 };
 
 /// VectorTargetTransformInfo - This interface is used by the vectorizers
@@ -192,6 +210,13 @@ public:
     return 1;
   }
 
+  /// Returns the cost of Intrinsic instructions.
+  virtual unsigned getIntrinsicInstrCost(Intrinsic::ID,
+                                         Type *RetTy,
+                                         ArrayRef<Type*> Tys) const {
+    return 1;
+  }
+
   /// Returns the number of pieces into which the provided type must be
   /// split during legalization. Zero is returned when the answer is unknown.
   virtual unsigned getNumberOfParts(Type *Tp) const {
diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h
index b036040f51..99232de076 100644
--- a/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/include/llvm/Transforms/IPO/InlinerPass.h
@@ -42,6 +42,7 @@ struct Inliner : public CallGraphSCCPass {
   // Pass class.
   virtual bool runOnSCC(CallGraphSCC &SCC);
 
+  using llvm::Pass::doFinalization;
   // doFinalization - Remove now-dead linkonce functions at the end of
   // processing to avoid breaking the SCC traversal.
   virtual bool doFinalization(CallGraph &CG);
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index 3558251a43..cdd1e03cde 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -14,6 +14,8 @@
 #ifndef LLVM_TRANSFORMS_INSTRUMENTATION_H
 #define LLVM_TRANSFORMS_INSTRUMENTATION_H
 
+#include "llvm/ADT/StringRef.h"
+
 namespace llvm {
 
 class ModulePass;
@@ -31,10 +33,19 @@ ModulePass *createPathProfilerPass();
 // Insert GCOV profiling instrumentation
 ModulePass *createGCOVProfilerPass(bool EmitNotes = true, bool EmitData = true,
                                    bool Use402Format = false,
-                                   bool UseExtraChecksum = false);
+                                   bool UseExtraChecksum = false,
+                                   bool NoRedZone = false);
 
 // Insert AddressSanitizer (address sanity checking) instrumentation
-FunctionPass *createAddressSanitizerPass();
+FunctionPass *createAddressSanitizerFunctionPass(
+    bool CheckInitOrder = false, bool CheckUseAfterReturn = false,
+    bool CheckLifetime = false, StringRef BlacklistFile = StringRef());
+ModulePass *createAddressSanitizerModulePass(
+    bool CheckInitOrder = false, StringRef BlacklistFile = StringRef());
+
+// Insert MemorySanitizer instrumentation (detection of uninitialized reads)
+FunctionPass *createMemorySanitizerPass();
+
 // Insert ThreadSanitizer (race detection) instrumentation
 FunctionPass *createThreadSanitizerPass();
 
diff --git a/include/llvm/Transforms/Utils/AddrModeMatcher.h b/include/llvm/Transforms/Utils/AddrModeMatcher.h
index 7d672839a6..1f708f27af 100644
--- a/include/llvm/Transforms/Utils/AddrModeMatcher.h
+++ b/include/llvm/Transforms/Utils/AddrModeMatcher.h
@@ -19,8 +19,8 @@
 #ifndef LLVM_TRANSFORMS_UTILS_ADDRMODEMATCHER_H
 #define LLVM_TRANSFORMS_UTILS_ADDRMODEMATCHER_H
 
-#include "llvm/AddressingMode.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/AddressingMode.h"
 #include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h
index ab9fc475fa..d5e7e87839 100644
--- a/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -81,7 +81,7 @@ namespace llvm {
   /// 'l' is added as the suffix of name, if 'Op' is a float, we add a 'f'
   /// suffix.
   Value *EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
-                              const AttrListPtr &Attrs);
+                              const AttributeSet &Attrs);
 
   /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
   /// is an integer.
diff --git a/include/llvm/Transforms/Utils/BypassSlowDivision.h b/include/llvm/Transforms/Utils/BypassSlowDivision.h
index ac8af122f0..3567ac254b 100644
--- a/include/llvm/Transforms/Utils/BypassSlowDivision.h
+++ b/include/llvm/Transforms/Utils/BypassSlowDivision.h
@@ -18,6 +18,7 @@
 #ifndef TRANSFORMS_UTILS_BYPASSSLOWDIVISION_H
 #define TRANSFORMS_UTILS_BYPASSSLOWDIVISION_H
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/Function.h"
 
 namespace llvm {
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index 1780025a27..14212f622b 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -18,9 +18,9 @@
 #ifndef LLVM_TRANSFORMS_UTILS_CLONING_H
 #define LLVM_TRANSFORMS_UTILS_CLONING_H
 
-#include "llvm/ADT/ValueMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ValueMap.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index be3029e545..702628d7cd 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -15,10 +15,10 @@
 #ifndef LLVM_TRANSFORMS_UTILS_LOCAL_H
 #define LLVM_TRANSFORMS_UTILS_LOCAL_H
 
+#include "llvm/DataLayout.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/Operator.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/DataLayout.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h
index db65a47e97..cd048936e0 100644
--- a/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -15,6 +15,7 @@
 #define LLVM_TRANSFORMS_UTILS_SSAUPDATER_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
 
 namespace llvm {
   class BasicBlock;
diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h
index 41e53a83e2..1ba4d22d5f 100644
--- a/include/llvm/Transforms/Vectorize.h
+++ b/include/llvm/Transforms/Vectorize.h
@@ -18,6 +18,7 @@
 namespace llvm {
 class BasicBlock;
 class BasicBlockPass;
+class Pass;
 
 //===----------------------------------------------------------------------===//
 /// @brief Vectorize configuration.
@@ -110,7 +111,7 @@ createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
 //
 // LoopVectorize - Create a loop vectorization pass.
 //
-Pass * createLoopVectorizePass();
+Pass *createLoopVectorizePass();
 
 //===----------------------------------------------------------------------===//
 /// @brief Vectorize the BasicBlock.
diff --git a/include/llvm/Value.h b/include/llvm/Value.h
index be218183e5..7adf54209c 100644
--- a/include/llvm/Value.h
+++ b/include/llvm/Value.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_VALUE_H
 #define LLVM_VALUE_H
 
-#include "llvm/Use.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Use.h"
 
 namespace llvm {
 
diff --git a/include/llvm/ValueSymbolTable.h b/include/llvm/ValueSymbolTable.h
index 1738cc4a7a..7493736fee 100644
--- a/include/llvm/ValueSymbolTable.h
+++ b/include/llvm/ValueSymbolTable.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_VALUE_SYMBOL_TABLE_H
 #define LLVM_VALUE_SYMBOL_TABLE_H
 
-#include "llvm/Value.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Value.h"
 
 namespace llvm {
   template<typename ValueSubClass, typename ItemParentClass>
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 752edd52b4..fd2456ff15 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -28,15 +28,15 @@
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Pass.h"
 #include "llvm/BasicBlock.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Function.h"
-#include "llvm/IntrinsicInst.h"
 #include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/Type.h"
-#include "llvm/DataLayout.h"
+#include "llvm/Pass.h"
 #include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 // Register the AliasAnalysis interface, providing a nice name to refer to.
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index 9f219f5637..9f4a47c77e 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -13,9 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Pass.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index ac72983a8d..6a00e67624 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -17,19 +17,19 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/InstIterator.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SetVector.h"
 using namespace llvm;
 
 static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden);
diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp
index f15c05153e..dd4c492748 100644
--- a/lib/Analysis/AliasDebugger.cpp
+++ b/lib/Analysis/AliasDebugger.cpp
@@ -17,12 +17,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Instructions.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
 #include <set>
 using namespace llvm;
 
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 388c755cbd..9fb861ee43 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -13,17 +13,17 @@
 
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
-#include "llvm/Type.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 /// mergeSetIn - Merge the specified alias set into this alias set.
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 9dc81a6a63..37e702f4c7 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -9,8 +9,8 @@
 
 #include "llvm-c/Analysis.h"
 #include "llvm-c/Initialization.h"
-#include "llvm/InitializePasses.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/InitializePasses.h"
 #include <cstring>
 
 using namespace llvm;
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index e8b3063588..b0578b9c42 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -13,9 +13,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalAlias.h"
@@ -25,16 +32,9 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Operator.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -1064,48 +1064,20 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
                    Location(V2, V2Size, V2TBAAInfo));
       if (PN > V2)
         std::swap(Locs.first, Locs.second);
-
-      // Find the first incoming phi value not from its parent.
-      unsigned f = 0;
-      while (PN->getIncomingBlock(f) == PN->getParent() &&
-             f < PN->getNumIncomingValues()-1)
-        ++f;
-
-      AliasResult Alias =
-        aliasCheck(PN->getIncomingValue(f), PNSize, PNTBAAInfo,
-                   PN2->getIncomingValueForBlock(PN->getIncomingBlock(f)),
-                   V2Size, V2TBAAInfo);
-      if (Alias == MayAlias)
-        return MayAlias;
-
-      // If the first source of the PHI nodes NoAlias and the other inputs are
-      // the PHI node itself through some amount of recursion this does not add
-      // any new information so just return NoAlias.
-      // bb:
-      //    ptr = ptr2 + 1
-      // loop:
-      //    ptr_phi = phi [bb, ptr], [loop, ptr_plus_one]
-      //    ptr2_phi = phi [bb, ptr2], [loop, ptr2_plus_one]
-      //    ...
-      //    ptr_plus_one = gep ptr_phi, 1
-      //    ptr2_plus_one = gep ptr2_phi, 1
-      // We assume for the recursion that the the phis (ptr_phi, ptr2_phi) do
-      // not alias each other.
-      bool ArePhisAssumedNoAlias = false;
-      AliasResult OrigAliasResult = NoAlias;
-      if (Alias == NoAlias) {
-        // Pretend the phis do not alias.
-        assert(AliasCache.count(Locs) &&
-               "There must exist an entry for the phi node");
-        OrigAliasResult = AliasCache[Locs];
-        AliasCache[Locs] = NoAlias;
-        ArePhisAssumedNoAlias = true;
-      }
+      // Analyse the PHIs' inputs under the assumption that the PHIs are
+      // NoAlias.
+      // If the PHIs are May/MustAlias there must be (recursively) an input
+      // operand from outside the PHIs' cycle that is MayAlias/MustAlias or
+      // there must be an operation on the PHIs within the PHIs' value cycle
+      // that causes a MayAlias.
+      // Pretend the phis do not alias.
+      AliasResult Alias = NoAlias;
+      assert(AliasCache.count(Locs) &&
+             "There must exist an entry for the phi node");
+      AliasResult OrigAliasResult = AliasCache[Locs];
+      AliasCache[Locs] = NoAlias;
 
       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-        if (i == f)
-          continue;
-
         AliasResult ThisAlias =
           aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo,
                      PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
@@ -1116,7 +1088,7 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
       }
 
       // Reset if speculation failed.
-      if (ArePhisAssumedNoAlias && Alias != NoAlias)
+      if (Alias != NoAlias)
         AliasCache[Locs] = OrigAliasResult;
 
       return Alias;
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index 8a660f737c..100e5c8ae7 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/InitializePasses.h"
-#include "llvm/Analysis/BlockFrequencyImpl.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 04a6560262..5f84a10165 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -11,14 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 76854000bd..9b6879a42e 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -18,7 +18,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CFGPrinter.h"
-
 #include "llvm/Pass.h"
 using namespace llvm;
 
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index b3a40bee42..1b67a9cf92 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -47,6 +47,7 @@ add_llvm_library(LLVMAnalysis
   ProfileVerifierPass.cpp
   ProfileDataLoader.cpp
   ProfileDataLoaderPass.cpp
+  PtrUseVisitor.cpp
   RegionInfo.cpp
   RegionPass.cpp
   RegionPrinter.cpp
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index 651a54be1b..485e39a827 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Function.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/DataLayout.h"
+#include "llvm/Support/CallSite.h"
 
 using namespace llvm;
 
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 91a5b84e8a..657ada4274 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -17,22 +17,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Operator.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FEnv.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/FEnv.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include <cerrno>
 #include <cmath>
 using namespace llvm;
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index 5adbf45810..0d0a8933b3 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -20,10 +20,10 @@
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
-#include "llvm/TargetTransformInfo.h"
-#include "llvm/Value.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetTransformInfo.h"
+#include "llvm/Value.h"
 using namespace llvm;
 
 namespace {
@@ -40,7 +40,7 @@ namespace {
     /// Returns -1 if the cost is unknown.
     /// Note, this method does not cache the cost calculation and it
     /// can be expensive in some cases.
-    unsigned getInstructionCost(Instruction *I) const;
+    unsigned getInstructionCost(const Instruction *I) const;
 
   private:
     virtual void getAnalysisUsage(AnalysisUsage &AU) const;
@@ -82,7 +82,7 @@ CostModelAnalysis::runOnFunction(Function &F) {
  return false;
 }
 
-unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const {
+unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
   if (!VTTI)
     return -1;
 
@@ -113,7 +113,7 @@ unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const {
     return VTTI->getArithmeticInstrCost(I->getOpcode(), I->getType());
   }
   case Instruction::Select: {
-    SelectInst *SI = cast<SelectInst>(I);
+    const SelectInst *SI = cast<SelectInst>(I);
     Type *CondTy = SI->getCondition()->getType();
     return VTTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy);
   }
@@ -123,14 +123,14 @@ unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const {
     return VTTI->getCmpSelInstrCost(I->getOpcode(), ValTy);
   }
   case Instruction::Store: {
-    StoreInst *SI = cast<StoreInst>(I);
+    const StoreInst *SI = cast<StoreInst>(I);
     Type *ValTy = SI->getValueOperand()->getType();
     return VTTI->getMemoryOpCost(I->getOpcode(), ValTy,
                                  SI->getAlignment(),
                                  SI->getPointerAddressSpace());
   }
   case Instruction::Load: {
-    LoadInst *LI = cast<LoadInst>(I);
+    const LoadInst *LI = cast<LoadInst>(I);
     return VTTI->getMemoryOpCost(I->getOpcode(), I->getType(),
                                  LI->getAlignment(),
                                  LI->getPointerAddressSpace());
@@ -151,7 +151,7 @@ unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const {
     return VTTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy);
   }
   case Instruction::ExtractElement: {
-    ExtractElementInst * EEI = cast<ExtractElementInst>(I);
+    const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
     ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
     unsigned Idx = -1;
     if (CI)
@@ -160,7 +160,7 @@ unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const {
                                     EEI->getOperand(0)->getType(), Idx);
   }
   case Instruction::InsertElement: {
-      InsertElementInst * IE = cast<InsertElementInst>(I);
+      const InsertElementInst * IE = cast<InsertElementInst>(I);
       ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
       unsigned Idx = -1;
       if (CI)
diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp
index 41cd34c07b..fdeb95bc9e 100644
--- a/lib/Analysis/DbgInfoPrinter.cpp
+++ b/lib/Analysis/DbgInfoPrinter.cpp
@@ -16,14 +16,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/Function.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Metadata.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index 684da98ce2..a91d9b6999 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -55,12 +55,12 @@
 
 #include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Operator.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Operator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InstIterator.h"
@@ -583,42 +583,40 @@ void Dependence::dump(raw_ostream &OS) const {
     else if (isInput())
       OS << "input";
     unsigned Levels = getLevels();
-    if (Levels) {
-      OS << " [";
-      for (unsigned II = 1; II <= Levels; ++II) {
-        if (isSplitable(II))
-          Splitable = true;
-        if (isPeelFirst(II))
-          OS << 'p';
-        const SCEV *Distance = getDistance(II);
-        if (Distance)
-          OS << *Distance;
-        else if (isScalar(II))
-          OS << "S";
+    OS << " [";
+    for (unsigned II = 1; II <= Levels; ++II) {
+      if (isSplitable(II))
+        Splitable = true;
+      if (isPeelFirst(II))
+        OS << 'p';
+      const SCEV *Distance = getDistance(II);
+      if (Distance)
+        OS << *Distance;
+      else if (isScalar(II))
+        OS << "S";
+      else {
+        unsigned Direction = getDirection(II);
+        if (Direction == DVEntry::ALL)
+          OS << "*";
         else {
-          unsigned Direction = getDirection(II);
-          if (Direction == DVEntry::ALL)
-            OS << "*";
-          else {
-            if (Direction & DVEntry::LT)
-              OS << "<";
-            if (Direction & DVEntry::EQ)
-              OS << "=";
-            if (Direction & DVEntry::GT)
-              OS << ">";
-          }
+          if (Direction & DVEntry::LT)
+            OS << "<";
+          if (Direction & DVEntry::EQ)
+            OS << "=";
+          if (Direction & DVEntry::GT)
+            OS << ">";
         }
-        if (isPeelLast(II))
-          OS << 'p';
-        if (II < Levels)
-          OS << " ";
       }
-      if (isLoopIndependent())
-        OS << "|<";
-      OS << "]";
-      if (Splitable)
-        OS << " splitable";
+      if (isPeelLast(II))
+        OS << 'p';
+      if (II < Levels)
+        OS << " ";
     }
+    if (isLoopIndependent())
+      OS << "|<";
+    OS << "]";
+    if (Splitable)
+      OS << " splitable";
   }
   OS << "!\n";
 }
@@ -2212,7 +2210,7 @@ const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) {
 //
 // It occurs to me that the presence of loop-invariant variables
 // changes the nature of the test from "greatest common divisor"
-// to "a common divisor!"
+// to "a common divisor".
 bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
                                     const SCEV *Dst,
                                     FullDependence &Result) const {
@@ -3199,6 +3197,9 @@ static void dumpSmallBitVector(SmallBitVector &BV) {
 Dependence *DependenceAnalysis::depends(Instruction *Src,
                                         Instruction *Dst,
                                         bool PossiblyLoopIndependent) {
+  if (Src == Dst)
+    PossiblyLoopIndependent = false;
+
   if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) ||
       (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory()))
     // if both instructions don't reference memory, there's no dependence
@@ -3552,7 +3553,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
     }
   }
 
-  // make sure Scalar flags are set correctly
+  // Make sure the Scalar flags are set correctly.
   SmallBitVector CompleteLoops(MaxLevels + 1);
   for (unsigned SI = 0; SI < Pairs; ++SI)
     CompleteLoops |= Pair[SI].Loops;
@@ -3560,8 +3561,10 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
     if (CompleteLoops[II])
       Result.DV[II - 1].Scalar = false;
 
-  // make sure loopIndepent flag is set correctly
   if (PossiblyLoopIndependent) {
+    // Make sure the LoopIndependent flag is set correctly.
+    // All directions must include equal, otherwise no
+    // loop-independent dependence is possible.
     for (unsigned II = 1; II <= CommonLevels; ++II) {
       if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) {
         Result.LoopIndependent = false;
@@ -3569,6 +3572,19 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
       }
     }
   }
+  else {
+    // On the other hand, if all directions are equal and there's no
+    // loop-independent dependence possible, then no dependence exists.
+    bool AllEqual = true;
+    for (unsigned II = 1; II <= CommonLevels; ++II) {
+      if (Result.getDirection(II) != Dependence::DVEntry::EQ) {
+        AllEqual = false;
+        break;
+      }
+    }
+    if (AllEqual)
+      return NULL;
+  }
 
   FullDependence *Final = new FullDependence(Result);
   Result.DV = NULL;
diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp
index 3e537e9f1a..7e4a89f1bd 100644
--- a/lib/Analysis/DominanceFrontier.cpp
+++ b/lib/Analysis/DominanceFrontier.cpp
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/DominanceFrontier.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index dec0eced27..f5cbc9413f 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -13,9 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CallGraph.h"
-#include "llvm/Module.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index f486937654..40b034f434 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -17,12 +17,12 @@
 
 #define DEBUG_TYPE "cgscc-passmgr"
 #include "llvm/CallGraphSCCPass.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Function.h"
-#include "llvm/PassManagers.h"
-#include "llvm/Analysis/CallGraph.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/PassManagers.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp
index e9df3ca010..b4623d2e34 100644
--- a/lib/Analysis/IPA/FindUsedTypes.cpp
+++ b/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -14,10 +14,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index 990caa80c8..c793cc0abc 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -16,20 +16,20 @@
 
 #define DEBUG_TYPE "globalsmodref-aa"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Instructions.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/InstIterator.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SCCIterator.h"
 #include <set>
 using namespace llvm;
 
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index d4221b89e0..62dd28f8d0 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -14,19 +14,19 @@
 
 #define DEBUG_TYPE "iv-users"
 #include "llvm/Analysis/IVUsers.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/Type.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Type.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 458e25503d..c2e9a65014 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -13,23 +13,23 @@
 
 #define DEBUG_TYPE "inline-cost"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DataLayout.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/CallingConv.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Operator.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/DataLayout.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
 
 using namespace llvm;
 
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index 3b385d26ba..9228c97d46 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -13,13 +13,13 @@
 
 #define DEBUG_TYPE "instcount"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Function.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(TotalInsts , "Number of instructions (of all types)");
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index a76e5ad1b8..00689475a4 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -18,20 +18,20 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "instsimplify"
-#include "llvm/GlobalAlias.h"
-#include "llvm/Operator.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DataLayout.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/Operator.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/ValueHandle.h"
-#include "llvm/DataLayout.h"
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
@@ -886,6 +886,33 @@ Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
                            RecursionLimit);
 }
 
+/// Given the operands for an FMul, see if we can fold the result
+static Value *SimplifyFMulInst(Value *Op0, Value *Op1,
+                               FastMathFlags FMF,
+                               const Query &Q,
+                               unsigned MaxRecurse) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::FMul, CLHS->getType(),
+                                      Ops, Q.TD, Q.TLI);
+    }
+ }
+
+ // Check for some fast-math optimizations
+ if (FMF.noNaNs()) {
+   if (FMF.noSignedZeros()) {
+     // fmul N S 0, x ==> 0
+     if (match(Op0, m_Zero()))
+       return Op0;
+     if (match(Op1, m_Zero()))
+       return Op1;
+   }
+ }
+
+ return 0;
+}
+
 /// SimplifyMulInst - Given operands for a Mul, see if we can
 /// fold the result.  If not, this returns null.
 static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
@@ -951,6 +978,14 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
   return 0;
 }
 
+Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1,
+                              FastMathFlags FMF,
+                              const DataLayout *TD,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT) {
+  return ::SimplifyFMulInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit);
+}
+
 Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *TD,
                              const TargetLibraryInfo *TLI,
                              const DominatorTree *DT) {
@@ -2799,6 +2834,10 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD,
                              cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
                              TD, TLI, DT);
     break;
+  case Instruction::FMul:
+    Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1),
+                              I->getFastMathFlags(), TD, TLI, DT);
+    break;
   case Instruction::Mul:
     Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT);
     break;
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 2b87d80d37..e2c9d554d0 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -13,23 +13,23 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "lazy-value-info"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/PatternMatch.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ValueHandle.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include <map>
 #include <stack>
 using namespace llvm;
diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp
index efb722bb97..f6c70f0066 100644
--- a/lib/Analysis/LibCallAliasAnalysis.cpp
+++ b/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LibCallAliasAnalysis.h"
-#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Function.h"
 #include "llvm/Pass.h"
 using namespace llvm;
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 6d6d580ed1..c204e9fbfb 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -34,26 +34,26 @@
 // 
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Lint.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/Lint.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Function.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Function.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 8341f9d830..1457bea014 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -15,18 +15,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfoImpl.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index 8578a63bee..b268a461a5 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -10,15 +10,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/InstIterator.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstIterator.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SetVector.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 0a539fe758..0c6568a093 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -13,19 +13,19 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "memory-builtins"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DataLayout.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Metadata.h"
 #include "llvm/Module.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 9872890494..249b4e3dee 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -16,21 +16,21 @@
 
 #define DEBUG_TYPE "memdep"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/PredIteratorCache.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/DataLayout.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/PredIteratorCache.h"
 using namespace llvm;
 
 STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
index f8c7514819..a8d143c1cb 100644
--- a/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -16,13 +16,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/Function.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp
index 2eb4137c53..6506e11bfc 100644
--- a/lib/Analysis/NoAliasAnalysis.cpp
+++ b/lib/Analysis/NoAliasAnalysis.cpp
@@ -12,10 +12,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Pass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/DataLayout.h"
+#include "llvm/Pass.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index c35737e472..76ff9e35d4 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp
index d4ad726481..466c04b898 100644
--- a/lib/Analysis/PathNumbering.cpp
+++ b/lib/Analysis/PathNumbering.cpp
@@ -31,18 +31,17 @@
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/TypeBuilder.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-
+#include "llvm/TypeBuilder.h"
 #include <queue>
+#include <sstream>
 #include <stack>
 #include <string>
 #include <utility>
-#include <sstream>
 
 using namespace llvm;
 
diff --git a/lib/Analysis/PathProfileInfo.cpp b/lib/Analysis/PathProfileInfo.cpp
index b361d3f4fa..2feb7ddace 100644
--- a/lib/Analysis/PathProfileInfo.cpp
+++ b/lib/Analysis/PathProfileInfo.cpp
@@ -13,15 +13,14 @@
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "path-profile-info"
 
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/Analysis/PathProfileInfo.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ProfileInfoTypes.h"
-#include "llvm/Analysis/PathProfileInfo.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include <cstdio>
 
 using namespace llvm;
diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp
index 0fcdfe75ae..0eb2568ab6 100644
--- a/lib/Analysis/PathProfileVerifier.cpp
+++ b/lib/Analysis/PathProfileVerifier.cpp
@@ -13,15 +13,14 @@
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "path-profile-verifier"
 
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileInfoTypes.h"
 #include "llvm/Analysis/PathProfileInfo.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include <stdio.h>
 
 using namespace llvm;
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index 6ed2729792..7b5947ad45 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -14,13 +14,13 @@
 #define DEBUG_TYPE "postdomtree"
 
 #include "llvm/Analysis/PostDominators.h"
-#include "llvm/Instructions.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SetOperations.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/ProfileDataLoader.cpp b/lib/Analysis/ProfileDataLoader.cpp
index a4f634af53..24fe81bb5f 100644
--- a/lib/Analysis/ProfileDataLoader.cpp
+++ b/lib/Analysis/ProfileDataLoader.cpp
@@ -12,12 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/ProfileDataLoader.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/Module.h"
-#include "llvm/InstrTypes.h"
-#include "llvm/Analysis/ProfileDataLoader.h"
 #include "llvm/Analysis/ProfileDataTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Module.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 #include <cstdio>
diff --git a/lib/Analysis/ProfileDataLoaderPass.cpp b/lib/Analysis/ProfileDataLoaderPass.cpp
index c43cff05a4..604541f4fd 100644
--- a/lib/Analysis/ProfileDataLoaderPass.cpp
+++ b/lib/Analysis/ProfileDataLoaderPass.cpp
@@ -15,22 +15,22 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "profile-metadata-loader"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileDataLoader.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/InstrTypes.h"
-#include "llvm/Module.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/MDBuilder.h"
 #include "llvm/Metadata.h"
+#include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileDataLoader.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Format.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 STATISTIC(NumEdgesRead, "The # of edges read.");
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
index 12b59e0a6f..b284b995ac 100644
--- a/lib/Analysis/ProfileEstimatorPass.cpp
+++ b/lib/Analysis/ProfileEstimatorPass.cpp
@@ -12,14 +12,14 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "profile-estimator"
-#include "llvm/Pass.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 static cl::opt<double>
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index b5b7ac1e50..2daa7d4f6b 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -12,16 +12,16 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "profile-info"
-#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/ADT/SmallSet.h"
-#include <set>
-#include <queue>
 #include <limits>
+#include <queue>
+#include <set>
 using namespace llvm;
 
 namespace llvm {
diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp
index 5c7c97cad1..4a9e17cd86 100644
--- a/lib/Analysis/ProfileInfoLoader.cpp
+++ b/lib/Analysis/ProfileInfoLoader.cpp
@@ -14,8 +14,8 @@
 
 #include "llvm/Analysis/ProfileInfoLoader.h"
 #include "llvm/Analysis/ProfileInfoTypes.h"
-#include "llvm/Module.h"
 #include "llvm/InstrTypes.h"
+#include "llvm/Module.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdio>
 #include <cstdlib>
diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp
index 5ecf052a1a..2f3486cb44 100644
--- a/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ b/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -12,20 +12,20 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "profile-loader"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ProfileInfoLoader.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/InstrTypes.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Format.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/Support/raw_ostream.h"
 #include <set>
 using namespace llvm;
 
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
index 0cb158865a..0456b03626 100644
--- a/lib/Analysis/ProfileVerifierPass.cpp
+++ b/lib/Analysis/ProfileVerifierPass.cpp
@@ -12,17 +12,18 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "profile-verifier"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/Debug.h"
 #include <set>
 using namespace llvm;
 
diff --git a/lib/Analysis/PtrUseVisitor.cpp b/lib/Analysis/PtrUseVisitor.cpp
new file mode 100644
index 0000000000..b48e86ea6e
--- /dev/null
+++ b/lib/Analysis/PtrUseVisitor.cpp
@@ -0,0 +1,58 @@
+//===- PtrUseVisitor.cpp - InstVisitors over a pointers uses --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Implementation of the pointer use visitors.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/PtrUseVisitor.h"
+
+using namespace llvm;
+
+void detail::PtrUseVisitorBase::enqueueUsers(Instruction &I) {
+  for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+       UI != UE; ++UI) {
+    if (VisitedUses.insert(&UI.getUse())) {
+      UseToVisit NewU = {
+        UseToVisit::UseAndIsOffsetKnownPair(&UI.getUse(), IsOffsetKnown),
+        Offset
+      };
+      Worklist.push_back(llvm_move(NewU));
+    }
+  }
+}
+
+bool detail::PtrUseVisitorBase::adjustOffsetForGEP(GetElementPtrInst &GEPI) {
+  if (!IsOffsetKnown)
+    return false;
+
+  for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI);
+       GTI != GTE; ++GTI) {
+    ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
+    if (!OpC)
+      return false;
+    if (OpC->isZero())
+      continue;
+
+    // Handle a struct index, which adds its field offset to the pointer.
+    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      unsigned ElementIdx = OpC->getZExtValue();
+      const StructLayout *SL = DL.getStructLayout(STy);
+      Offset += APInt(Offset.getBitWidth(),
+                         SL->getElementOffset(ElementIdx));
+      continue;
+    }
+
+    // For array or vector indices, scale the index by the size of the type.
+    APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth());
+    Offset += Index * APInt(Offset.getBitWidth(),
+                               DL.getTypeAllocSize(GTI.getIndexedType()));
+  }
+  return true;
+}
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 30f0d2f10d..fad5074086 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -10,14 +10,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/RegionInfo.h"
-#include "llvm/Analysis/RegionIterator.h"
-
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
 
 #define DEBUG_TYPE "region"
 #include "llvm/Support/Debug.h"
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
index 8b23cc7042..c5f1b92592 100644
--- a/lib/Analysis/RegionPrinter.cpp
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -9,16 +9,16 @@
 // Print out the region tree of a function using dotty/graphviz.
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
 #include "llvm/Analysis/RegionPrinter.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/DOTGraphTraitsPass.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 5f60bd1674..f11cd15bf3 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -59,22 +59,25 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "scalar-evolution"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Operator.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/Constants.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/Debug.h"
@@ -83,9 +86,7 @@
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -6120,8 +6121,8 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
       getTypeSizeInBits(ICI->getOperand(0)->getType()))
     return false;
 
-  // Now that we found a conditional branch that dominates the loop, check to
-  // see if it is the comparison we are looking for.
+  // Now that we found a conditional branch that dominates the loop or controls
+  // the loop latch. Check to see if it is the comparison we are looking for.
   ICmpInst::Predicate FoundPred;
   if (Inverse)
     FoundPred = ICI->getInversePredicate();
@@ -6151,7 +6152,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
       return CmpInst::isTrueWhenEqual(Pred);
   if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
     if (FoundLHS == FoundRHS)
-      return CmpInst::isFalseWhenEqual(Pred);
+      return CmpInst::isFalseWhenEqual(FoundPred);
 
   // Check to see if we can make the LHS or RHS match.
   if (LHS == FoundRHS || RHS == FoundLHS) {
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index e9edb3e083..79c5f0deb0 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -19,9 +19,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/Passes.h"
 #include "llvm/Pass.h"
 using namespace llvm;
 
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 111bfb4a6a..2fe0091f5e 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -14,13 +14,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/DataLayout.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLowering.h"
-#include "llvm/ADT/STLExtras.h"
 
 using namespace llvm;
 
diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp
index 22da857620..39c8282043 100644
--- a/lib/Analysis/Trace.cpp
+++ b/lib/Analysis/Trace.cpp
@@ -16,8 +16,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/Trace.h"
-#include "llvm/Function.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 0faf1398ec..92a0002edd 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -57,12 +57,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Constants.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
 #include "llvm/Metadata.h"
+#include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 using namespace llvm;
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 3beb373dc5..13313e753b 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -13,21 +13,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/DataLayout.h"
 #include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
 #include "llvm/Operator.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/PatternMatch.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include <cstring>
 using namespace llvm;
 using namespace llvm::PatternMatch;
@@ -862,6 +862,72 @@ bool llvm::isPowerOfTwo(Value *V, const DataLayout *TD, bool OrZero,
   return false;
 }
 
+/// \brief Test whether a GEP's result is known to be non-null.
+///
+/// Uses properties inherent in a GEP to try to determine whether it is known
+/// to be non-null.
+///
+/// Currently this routine does not support vector GEPs.
+static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL,
+                              unsigned Depth) {
+  if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0)
+    return false;
+
+  // FIXME: Support vector-GEPs.
+  assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
+
+  // If the base pointer is non-null, we cannot walk to a null address with an
+  // inbounds GEP in address space zero.
+  if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth))
+    return true;
+
+  // Past this, if we don't have DataLayout, we can't do much.
+  if (!DL)
+    return false;
+
+  // Walk the GEP operands and see if any operand introduces a non-zero offset.
+  // If so, then the GEP cannot produce a null pointer, as doing so would
+  // inherently violate the inbounds contract within address space zero.
+  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
+       GTI != GTE; ++GTI) {
+    // Struct types are easy -- they must always be indexed by a constant.
+    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
+      unsigned ElementIdx = OpC->getZExtValue();
+      const StructLayout *SL = DL->getStructLayout(STy);
+      uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
+      if (ElementOffset > 0)
+        return true;
+      continue;
+    }
+
+    // If we have a zero-sized type, the index doesn't matter. Keep looping.
+    if (DL->getTypeAllocSize(GTI.getIndexedType()) == 0)
+      continue;
+
+    // Fast path the constant operand case both for efficiency and so we don't
+    // increment Depth when just zipping down an all-constant GEP.
+    if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
+      if (!OpC->isZero())
+        return true;
+      continue;
+    }
+
+    // We post-increment Depth here because while isKnownNonZero increments it
+    // as well, when we pop back up that increment won't persist. We don't want
+    // to recurse 10k times just because we have 10k GEP operands. We don't
+    // bail completely out because we want to handle constant GEPs regardless
+    // of depth.
+    if (Depth++ >= MaxDepth)
+      continue;
+
+    if (isKnownNonZero(GTI.getOperand(), DL, Depth))
+      return true;
+  }
+
+  return false;
+}
+
 /// isKnownNonZero - Return true if the given value is known to be non-zero
 /// when defined.  For vectors return true if every element is known to be
 /// non-zero when defined.  Supports values with integer or pointer type and
@@ -881,6 +947,13 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
   if (Depth++ >= MaxDepth)
     return false;
 
+  // Check for pointer simplifications.
+  if (V->getType()->isPointerTy()) {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
+      if (isGEPKnownNonNull(GEP, TD, Depth))
+        return true;
+  }
+
   unsigned BitWidth = getBitWidth(V->getType(), TD);
 
   // X | Y != 0 if X != 0 or Y != 0.
@@ -1312,7 +1385,12 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
 
   const Operator *I = dyn_cast<Operator>(V);
   if (I == 0) return false;
-  
+
+  // Check if the nsz fast-math flag is set
+  if (const FPMathOperator *FPO = dyn_cast<FPMathOperator>(I))
+    if (FPO->hasNoSignedZeros())
+      return true;
+
   // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
   if (I->getOpcode() == Instruction::FAdd &&
       isa<ConstantFP>(I->getOperand(1)) && 
diff --git a/lib/Archive/Archive.cpp b/lib/Archive/Archive.cpp
index 1eab27d3eb..9f1b5e9f2e 100644
--- a/lib/Archive/Archive.cpp
+++ b/lib/Archive/Archive.cpp
@@ -12,6 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Bitcode/Archive.h"
 #include "ArchiveInternals.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Module.h"
@@ -19,8 +20,8 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/system_error.h"
-#include <memory>
 #include <cstring>
+#include <memory>
 using namespace llvm;
 
 // getMemberSize - compute the actual physical size of the file member as seen
diff --git a/lib/Archive/ArchiveInternals.h b/lib/Archive/ArchiveInternals.h
index 639f5ac269..f6c87e899f 100644
--- a/lib/Archive/ArchiveInternals.h
+++ b/lib/Archive/ArchiveInternals.h
@@ -14,10 +14,9 @@
 #ifndef LIB_ARCHIVE_ARCHIVEINTERNALS_H
 #define LIB_ARCHIVE_ARCHIVEINTERNALS_H
 
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Bitcode/Archive.h"
 #include "llvm/Support/TimeValue.h"
-#include "llvm/ADT/StringExtras.h"
-
 #include <cstring>
 
 #define ARFILE_MAGIC "!<arch>\n"                   ///< magic string
diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp
index 5052495c0d..527a72f945 100644
--- a/lib/Archive/ArchiveReader.cpp
+++ b/lib/Archive/ArchiveReader.cpp
@@ -11,11 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Bitcode/Archive.h"
 #include "ArchiveInternals.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include <cstdio>
 #include <cstdlib>
 #include <memory>
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index ec6b4b8758..b9f7b2f1d0 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -11,18 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Bitcode/Archive.h"
 #include "ArchiveInternals.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Module.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/system_error.h"
 #include <fstream>
-#include <ostream>
 #include <iomanip>
+#include <ostream>
 using namespace llvm;
 
 // Write an integer using variable bit rate encoding. This saves a few bytes
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index d8ffe8fb73..350eef2b19 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -12,14 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "LLLexer.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Assembly/Parser.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instruction.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Assembly/Parser.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cctype>
@@ -402,7 +402,7 @@ lltok::Kind LLLexer::LexExclaim() {
   }
   return lltok::exclaim;
 }
-  
+
 /// LexIdentifier: Handle several related productions:
 ///    Label           [-a-zA-Z$._0-9]+:
 ///    IntegerType     i[0-9]+
@@ -486,7 +486,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(target);
   KEYWORD(triple);
   KEYWORD(unwind);
-  KEYWORD(deplibs);
+  KEYWORD(deplibs);             // FIXME: Remove in 4.0.
   KEYWORD(datalayout);
   KEYWORD(volatile);
   KEYWORD(atomic);
@@ -498,6 +498,11 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(seq_cst);
   KEYWORD(singlethread);
 
+  KEYWORD(nnan)
+  KEYWORD(ninf)
+  KEYWORD(nsz)
+  KEYWORD(arcp)
+  KEYWORD(fast)
   KEYWORD(nuw);
   KEYWORD(nsw);
   KEYWORD(exact);
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
index 09aea5b018..1a307a8bec 100644
--- a/lib/AsmParser/LLLexer.h
+++ b/lib/AsmParser/LLLexer.h
@@ -15,8 +15,8 @@
 #define LIB_ASMPARSER_LLLEXER_H
 
 #include "LLToken.h"
-#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/Support/SourceMgr.h"
 #include <string>
 
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 41cca68576..16b955d58f 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LLParser.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/AutoUpgrade.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
@@ -20,10 +21,9 @@
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Operator.h"
-#include "llvm/ValueSymbolTable.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ValueSymbolTable.h"
 using namespace llvm;
 
 static std::string getTypeString(Type *T) {
@@ -267,6 +267,7 @@ bool LLParser::ParseTargetDefinition() {
 /// toplevelentity
 ///   ::= 'deplibs' '=' '[' ']'
 ///   ::= 'deplibs' '=' '[' STRINGCONSTANT (',' STRINGCONSTANT)* ']'
+/// FIXME: Remove in 4.0. Currently parse, but ignore.
 bool LLParser::ParseDepLibs() {
   assert(Lex.getKind() == lltok::kw_deplibs);
   Lex.Lex();
@@ -277,14 +278,10 @@ bool LLParser::ParseDepLibs() {
   if (EatIfPresent(lltok::rsquare))
     return false;
 
-  std::string Str;
-  if (ParseStringConstant(Str)) return true;
-  M->addLibrary(Str);
-
-  while (EatIfPresent(lltok::comma)) {
+  do {
+    std::string Str;
     if (ParseStringConstant(Str)) return true;
-    M->addLibrary(Str);
-  }
+  } while (EatIfPresent(lltok::comma));
 
   return ParseToken(lltok::rsquare, "expected ']' at end of list");
 }
@@ -915,11 +912,8 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
          ParseToken(lltok::rparen, "expected ')' in address space");
 }
 
-/// ParseOptionalAttrs - Parse a potentially empty attribute list.  AttrKind
-/// indicates what kind of attribute list this is: 0: function arg, 1: result,
-/// 2: function attr.
-bool LLParser::ParseOptionalAttrs(AttrBuilder &B, unsigned AttrKind) {
-  LocTy AttrLoc = Lex.getLoc();
+/// ParseOptionalFuncAttrs - Parse a potentially empty list of function attributes.
+bool LLParser::ParseOptionalFuncAttrs(AttrBuilder &B) {
   bool HaveError = false;
 
   B.clear();
@@ -929,42 +923,70 @@ bool LLParser::ParseOptionalAttrs(AttrBuilder &B, unsigned AttrKind) {
     switch (Token) {
     default:  // End of attributes.
       return HaveError;
-    case lltok::kw_zeroext:         B.addAttribute(Attributes::ZExt); break;
-    case lltok::kw_signext:         B.addAttribute(Attributes::SExt); break;
-    case lltok::kw_inreg:           B.addAttribute(Attributes::InReg); break;
-    case lltok::kw_sret:            B.addAttribute(Attributes::StructRet); break;
-    case lltok::kw_noalias:         B.addAttribute(Attributes::NoAlias); break;
-    case lltok::kw_nocapture:       B.addAttribute(Attributes::NoCapture); break;
-    case lltok::kw_byval:           B.addAttribute(Attributes::ByVal); break;
-    case lltok::kw_nest:            B.addAttribute(Attributes::Nest); break;
-
+    case lltok::kw_alignstack: {
+      unsigned Alignment;
+      if (ParseOptionalStackAlignment(Alignment))
+        return true;
+      B.addStackAlignmentAttr(Alignment);
+      continue;
+    }
+    case lltok::kw_align: {
+      // As a hack, we allow "align 2" on functions as a synonym for "alignstack
+      // 2".
+      unsigned Alignment;
+      if (ParseOptionalAlignment(Alignment))
+        return true;
+      B.addAlignmentAttr(Alignment);
+      continue;
+    }
+    case lltok::kw_address_safety:  B.addAttribute(Attributes::AddressSafety); break;
+    case lltok::kw_alwaysinline:    B.addAttribute(Attributes::AlwaysInline); break;
+    case lltok::kw_inlinehint:      B.addAttribute(Attributes::InlineHint); break;
+    case lltok::kw_minsize:         B.addAttribute(Attributes::MinSize); break;
+    case lltok::kw_naked:           B.addAttribute(Attributes::Naked); break;
+    case lltok::kw_noinline:        B.addAttribute(Attributes::NoInline); break;
+    case lltok::kw_nonlazybind:     B.addAttribute(Attributes::NonLazyBind); break;
+    case lltok::kw_noredzone:       B.addAttribute(Attributes::NoRedZone); break;
+    case lltok::kw_noimplicitfloat: B.addAttribute(Attributes::NoImplicitFloat); break;
     case lltok::kw_noreturn:        B.addAttribute(Attributes::NoReturn); break;
     case lltok::kw_nounwind:        B.addAttribute(Attributes::NoUnwind); break;
-    case lltok::kw_uwtable:         B.addAttribute(Attributes::UWTable); break;
-    case lltok::kw_returns_twice:   B.addAttribute(Attributes::ReturnsTwice); break;
-    case lltok::kw_noinline:        B.addAttribute(Attributes::NoInline); break;
+    case lltok::kw_optsize:         B.addAttribute(Attributes::OptimizeForSize); break;
     case lltok::kw_readnone:        B.addAttribute(Attributes::ReadNone); break;
     case lltok::kw_readonly:        B.addAttribute(Attributes::ReadOnly); break;
-    case lltok::kw_inlinehint:      B.addAttribute(Attributes::InlineHint); break;
-    case lltok::kw_alwaysinline:    B.addAttribute(Attributes::AlwaysInline); break;
-    case lltok::kw_optsize:         B.addAttribute(Attributes::OptimizeForSize); break;
+    case lltok::kw_returns_twice:   B.addAttribute(Attributes::ReturnsTwice); break;
     case lltok::kw_ssp:             B.addAttribute(Attributes::StackProtect); break;
     case lltok::kw_sspreq:          B.addAttribute(Attributes::StackProtectReq); break;
-    case lltok::kw_noredzone:       B.addAttribute(Attributes::NoRedZone); break;
-    case lltok::kw_noimplicitfloat: B.addAttribute(Attributes::NoImplicitFloat); break;
-    case lltok::kw_naked:           B.addAttribute(Attributes::Naked); break;
-    case lltok::kw_nonlazybind:     B.addAttribute(Attributes::NonLazyBind); break;
-    case lltok::kw_address_safety:  B.addAttribute(Attributes::AddressSafety); break;
-    case lltok::kw_minsize:         B.addAttribute(Attributes::MinSize); break;
+    case lltok::kw_uwtable:         B.addAttribute(Attributes::UWTable); break;
 
-    case lltok::kw_alignstack: {
-      unsigned Alignment;
-      if (ParseOptionalStackAlignment(Alignment))
-        return true;
-      B.addStackAlignmentAttr(Alignment);
-      continue;
+    // Error handling.
+    case lltok::kw_zeroext:
+    case lltok::kw_signext:
+    case lltok::kw_inreg:
+      HaveError |= Error(Lex.getLoc(), "invalid use of attribute on a function");
+      break;
+    case lltok::kw_sret:      case lltok::kw_noalias:
+    case lltok::kw_nocapture: case lltok::kw_byval:
+    case lltok::kw_nest:
+      HaveError |=
+        Error(Lex.getLoc(), "invalid use of parameter-only attribute on a function");
+      break;
     }
 
+    Lex.Lex();
+  }
+}
+
+/// ParseOptionalParamAttrs - Parse a potentially empty list of parameter attributes.
+bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
+  bool HaveError = false;
+
+  B.clear();
+
+  while (1) {
+    lltok::Kind Token = Lex.getKind();
+    switch (Token) {
+    default:  // End of attributes.
+      return HaveError;
     case lltok::kw_align: {
       unsigned Alignment;
       if (ParseOptionalAlignment(Alignment))
@@ -972,51 +994,65 @@ bool LLParser::ParseOptionalAttrs(AttrBuilder &B, unsigned AttrKind) {
       B.addAlignmentAttr(Alignment);
       continue;
     }
+    case lltok::kw_byval:           B.addAttribute(Attributes::ByVal); break;
+    case lltok::kw_inreg:           B.addAttribute(Attributes::InReg); break;
+    case lltok::kw_nest:            B.addAttribute(Attributes::Nest); break;
+    case lltok::kw_noalias:         B.addAttribute(Attributes::NoAlias); break;
+    case lltok::kw_nocapture:       B.addAttribute(Attributes::NoCapture); break;
+    case lltok::kw_signext:         B.addAttribute(Attributes::SExt); break;
+    case lltok::kw_sret:            B.addAttribute(Attributes::StructRet); break;
+    case lltok::kw_zeroext:         B.addAttribute(Attributes::ZExt); break;
 
+    case lltok::kw_noreturn:       case lltok::kw_nounwind:
+    case lltok::kw_uwtable:        case lltok::kw_returns_twice:
+    case lltok::kw_noinline:       case lltok::kw_readnone:
+    case lltok::kw_readonly:       case lltok::kw_inlinehint:
+    case lltok::kw_alwaysinline:   case lltok::kw_optsize:
+    case lltok::kw_ssp:            case lltok::kw_sspreq:
+    case lltok::kw_noredzone:      case lltok::kw_noimplicitfloat:
+    case lltok::kw_naked:          case lltok::kw_nonlazybind:
+    case lltok::kw_address_safety: case lltok::kw_minsize:
+    case lltok::kw_alignstack:
+      HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
+      break;
     }
 
-    // Perform some error checking.
+    Lex.Lex();
+  }
+}
+
+/// ParseOptionalReturnAttrs - Parse a potentially empty list of return attributes.
+bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
+  bool HaveError = false;
+
+  B.clear();
+
+  while (1) {
+    lltok::Kind Token = Lex.getKind();
     switch (Token) {
-    default:
-      if (AttrKind == 2)
-        HaveError |= Error(AttrLoc, "invalid use of attribute on a function");
-      break;
-    case lltok::kw_align:
-      // As a hack, we allow "align 2" on functions as a synonym for
-      // "alignstack 2".
-      break;
+    default:  // End of attributes.
+      return HaveError;
+    case lltok::kw_inreg:           B.addAttribute(Attributes::InReg); break;
+    case lltok::kw_noalias:         B.addAttribute(Attributes::NoAlias); break;
+    case lltok::kw_signext:         B.addAttribute(Attributes::SExt); break;
+    case lltok::kw_zeroext:         B.addAttribute(Attributes::ZExt); break;
 
-    // Parameter Only:
-    case lltok::kw_sret:
-    case lltok::kw_nocapture:
-    case lltok::kw_byval:
-    case lltok::kw_nest:
-      if (AttrKind != 0)
-        HaveError |= Error(AttrLoc, "invalid use of parameter-only attribute");
+    // Error handling.
+    case lltok::kw_sret:  case lltok::kw_nocapture:
+    case lltok::kw_byval: case lltok::kw_nest:
+      HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute");
       break;
-
-    // Function Only:
-    case lltok::kw_noreturn:
-    case lltok::kw_nounwind:
-    case lltok::kw_readnone:
-    case lltok::kw_readonly:
-    case lltok::kw_noinline:
-    case lltok::kw_alwaysinline:
-    case lltok::kw_optsize:
-    case lltok::kw_ssp:
-    case lltok::kw_sspreq:
-    case lltok::kw_noredzone:
-    case lltok::kw_noimplicitfloat:
-    case lltok::kw_naked:
-    case lltok::kw_inlinehint:
-    case lltok::kw_alignstack:
-    case lltok::kw_uwtable:
-    case lltok::kw_nonlazybind:
-    case lltok::kw_returns_twice:
-    case lltok::kw_address_safety:
-    case lltok::kw_minsize:
-      if (AttrKind != 2)
-        HaveError |= Error(AttrLoc, "invalid use of function-only attribute");
+    case lltok::kw_noreturn:       case lltok::kw_nounwind:
+    case lltok::kw_uwtable:        case lltok::kw_returns_twice:
+    case lltok::kw_noinline:       case lltok::kw_readnone:
+    case lltok::kw_readonly:       case lltok::kw_inlinehint:
+    case lltok::kw_alwaysinline:   case lltok::kw_optsize:
+    case lltok::kw_ssp:            case lltok::kw_sspreq:
+    case lltok::kw_noredzone:      case lltok::kw_noimplicitfloat:
+    case lltok::kw_naked:          case lltok::kw_nonlazybind:
+    case lltok::kw_address_safety: case lltok::kw_minsize:
+    case lltok::kw_alignstack:     case lltok::kw_align:
+      HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
       break;
     }
 
@@ -1447,7 +1483,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
       return true;
 
     // Otherwise, handle normal operands.
-    if (ParseOptionalAttrs(ArgAttrs, 0) || ParseValue(ArgTy, V, PFS))
+    if (ParseOptionalParamAttrs(ArgAttrs) || ParseValue(ArgTy, V, PFS))
       return true;
     ArgList.push_back(ParamInfo(ArgLoc, V, Attributes::get(V->getContext(),
                                                            ArgAttrs)));
@@ -1486,7 +1522,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
     std::string Name;
 
     if (ParseType(ArgTy) ||
-        ParseOptionalAttrs(Attrs, 0)) return true;
+        ParseOptionalParamAttrs(Attrs)) return true;
 
     if (ArgTy->isVoidTy())
       return Error(TypeLoc, "argument can not have void type");
@@ -1512,7 +1548,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
 
       // Otherwise must be an argument type.
       TypeLoc = Lex.getLoc();
-      if (ParseType(ArgTy) || ParseOptionalAttrs(Attrs, 0)) return true;
+      if (ParseType(ArgTy) || ParseOptionalParamAttrs(Attrs)) return true;
 
       if (ArgTy->isVoidTy())
         return Error(TypeLoc, "argument can not have void type");
@@ -2689,7 +2725,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   if (ParseOptionalLinkage(Linkage) ||
       ParseOptionalVisibility(Visibility) ||
       ParseOptionalCallingConv(CC) ||
-      ParseOptionalAttrs(RetAttrs, 1) ||
+      ParseOptionalReturnAttrs(RetAttrs) ||
       ParseType(RetType, RetTypeLoc, true /*void allowed*/))
     return true;
 
@@ -2756,7 +2792,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   if (ParseArgumentList(ArgList, isVarArg) ||
       ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
                          &UnnamedAddrLoc) ||
-      ParseOptionalAttrs(FuncAttrs, 2) ||
+      ParseOptionalFuncAttrs(FuncAttrs) ||
       (EatIfPresent(lltok::kw_section) &&
        ParseStringConstant(Section)) ||
       ParseOptionalAlignment(Alignment) ||
@@ -2777,7 +2813,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
 
   if (RetAttrs.hasAttributes())
     Attrs.push_back(
-      AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+      AttributeWithIndex::get(AttributeSet::ReturnIndex,
                               Attributes::get(RetType->getContext(),
                                               RetAttrs)));
 
@@ -2789,11 +2825,11 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
 
   if (FuncAttrs.hasAttributes())
     Attrs.push_back(
-      AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+      AttributeWithIndex::get(AttributeSet::FunctionIndex,
                               Attributes::get(RetType->getContext(),
                                               FuncAttrs)));
 
-  AttrListPtr PAL = AttrListPtr::get(Context, Attrs);
+  AttributeSet PAL = AttributeSet::get(Context, Attrs);
 
   if (PAL.getParamAttributes(1).hasAttribute(Attributes::StructRet) &&
       !RetType->isVoidTy())
@@ -3012,7 +3048,17 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   }
   case lltok::kw_fadd:
   case lltok::kw_fsub:
-  case lltok::kw_fmul:    return ParseArithmetic(Inst, PFS, KeywordVal, 2);
+  case lltok::kw_fmul:
+  case lltok::kw_fdiv:
+  case lltok::kw_frem: {
+    FastMathFlags FMF = EatFastMathFlagsIfPresent();
+    int Res = ParseArithmetic(Inst, PFS, KeywordVal, 2);
+    if (Res != 0)
+      return Res;
+    if (FMF.any())
+      Inst->setFastMathFlags(FMF);
+    return 0;
+  }
 
   case lltok::kw_sdiv:
   case lltok::kw_udiv:
@@ -3027,8 +3073,6 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
 
   case lltok::kw_urem:
   case lltok::kw_srem:   return ParseArithmetic(Inst, PFS, KeywordVal, 1);
-  case lltok::kw_fdiv:
-  case lltok::kw_frem:   return ParseArithmetic(Inst, PFS, KeywordVal, 2);
   case lltok::kw_and:
   case lltok::kw_or:
   case lltok::kw_xor:    return ParseLogical(Inst, PFS, KeywordVal);
@@ -3276,11 +3320,11 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
 
   BasicBlock *NormalBB, *UnwindBB;
   if (ParseOptionalCallingConv(CC) ||
-      ParseOptionalAttrs(RetAttrs, 1) ||
+      ParseOptionalReturnAttrs(RetAttrs) ||
       ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
       ParseValID(CalleeID) ||
       ParseParameterList(ArgList, PFS) ||
-      ParseOptionalAttrs(FnAttrs, 2) ||
+      ParseOptionalFuncAttrs(FnAttrs) ||
       ParseToken(lltok::kw_to, "expected 'to' in invoke") ||
       ParseTypeAndBasicBlock(NormalBB, PFS) ||
       ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") ||
@@ -3314,7 +3358,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   SmallVector<AttributeWithIndex, 8> Attrs;
   if (RetAttrs.hasAttributes())
     Attrs.push_back(
-      AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+      AttributeWithIndex::get(AttributeSet::ReturnIndex,
                               Attributes::get(Callee->getContext(),
                                               RetAttrs)));
 
@@ -3345,12 +3389,12 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
 
   if (FnAttrs.hasAttributes())
     Attrs.push_back(
-      AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+      AttributeWithIndex::get(AttributeSet::FunctionIndex,
                               Attributes::get(Callee->getContext(),
                                               FnAttrs)));
 
   // Finish off the Attributes and check them
-  AttrListPtr PAL = AttrListPtr::get(Context, Attrs);
+  AttributeSet PAL = AttributeSet::get(Context, Attrs);
 
   InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB, Args);
   II->setCallingConv(CC);
@@ -3682,11 +3726,11 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
 
   if ((isTail && ParseToken(lltok::kw_call, "expected 'tail call'")) ||
       ParseOptionalCallingConv(CC) ||
-      ParseOptionalAttrs(RetAttrs, 1) ||
+      ParseOptionalReturnAttrs(RetAttrs) ||
       ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
       ParseValID(CalleeID) ||
       ParseParameterList(ArgList, PFS) ||
-      ParseOptionalAttrs(FnAttrs, 2))
+      ParseOptionalFuncAttrs(FnAttrs))
     return true;
 
   // If RetType is a non-function pointer type, then this is the short syntax
@@ -3716,7 +3760,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   SmallVector<AttributeWithIndex, 8> Attrs;
   if (RetAttrs.hasAttributes())
     Attrs.push_back(
-      AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+      AttributeWithIndex::get(AttributeSet::ReturnIndex,
                               Attributes::get(Callee->getContext(),
                                               RetAttrs)));
 
@@ -3747,12 +3791,12 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
 
   if (FnAttrs.hasAttributes())
     Attrs.push_back(
-      AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+      AttributeWithIndex::get(AttributeSet::FunctionIndex,
                               Attributes::get(Callee->getContext(),
                                               FnAttrs)));
 
   // Finish off the Attributes and check them
-  AttrListPtr PAL = AttrListPtr::get(Context, Attrs);
+  AttributeSet PAL = AttributeSet::get(Context, Attrs);
 
   CallInst *CI = CallInst::Create(Callee, Args);
   CI->setTailCall(isTail);
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 9f9672d67b..3a38159d80 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -15,13 +15,14 @@
 #define LLVM_ASMPARSER_LLPARSER_H
 
 #include "LLLexer.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Attributes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
+#include "llvm/Operator.h"
 #include "llvm/Support/ValueHandle.h"
+#include "llvm/Type.h"
 #include <map>
 
 namespace llvm {
@@ -154,6 +155,21 @@ namespace llvm {
       Lex.Lex();
       return true;
     }
+
+    FastMathFlags EatFastMathFlagsIfPresent() {
+      FastMathFlags FMF;
+      while (true)
+        switch (Lex.getKind()) {
+        case lltok::kw_fast: FMF.setUnsafeAlgebra();   Lex.Lex(); continue;
+        case lltok::kw_nnan: FMF.setNoNaNs();          Lex.Lex(); continue;
+        case lltok::kw_ninf: FMF.setNoInfs();          Lex.Lex(); continue;
+        case lltok::kw_nsz:  FMF.setNoSignedZeros();   Lex.Lex(); continue;
+        case lltok::kw_arcp: FMF.setAllowReciprocal(); Lex.Lex(); continue;
+        default: return FMF;
+        }
+      return FMF;
+    }
+
     bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) {
       if (Lex.getKind() != T) {
         Present = false;
@@ -175,7 +191,9 @@ namespace llvm {
     bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
     bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
     bool ParseOptionalAddrSpace(unsigned &AddrSpace);
-    bool ParseOptionalAttrs(AttrBuilder &Attrs, unsigned AttrKind);
+    bool ParseOptionalFuncAttrs(AttrBuilder &B);
+    bool ParseOptionalParamAttrs(AttrBuilder &B);
+    bool ParseOptionalReturnAttrs(AttrBuilder &B);
     bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
     bool ParseOptionalLinkage(unsigned &Linkage) {
       bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
@@ -200,8 +218,8 @@ namespace llvm {
     bool ParseTopLevelEntities();
     bool ValidateEndOfModule();
     bool ParseTargetDefinition();
-    bool ParseDepLibs();
     bool ParseModuleAsm();
+    bool ParseDepLibs();        // FIXME: Remove in 4.0.
     bool ParseUnnamedType();
     bool ParseNamedType();
     bool ParseDeclare();
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 036686d318..2ed2c221a8 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -54,12 +54,17 @@ namespace lltok {
     kw_target,
     kw_triple,
     kw_unwind,
-    kw_deplibs,
+    kw_deplibs,                 // FIXME: Remove in 4.0
     kw_datalayout,
     kw_volatile,
     kw_atomic,
     kw_unordered, kw_monotonic, kw_acquire, kw_release, kw_acq_rel, kw_seq_cst,
     kw_singlethread,
+    kw_nnan,
+    kw_ninf,
+    kw_nsz,
+    kw_arcp,
+    kw_fast,
     kw_nuw,
     kw_nsw,
     kw_exact,
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index 21b7fd411e..a473e6b713 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -13,10 +13,10 @@
 
 #include "llvm/Assembly/Parser.h"
 #include "LLParser.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 #include <cstring>
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index 448504c89e..a47c013a52 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -11,8 +11,8 @@
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include <string>
 #include <cstring>
+#include <string>
 
 using namespace llvm;
 
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 9b28c9d60a..6098c1d61c 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -13,19 +13,19 @@
 
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "BitcodeReader.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/AutoUpgrade.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
+#include "llvm/OperandTraits.h"
 #include "llvm/Operator.h"
-#include "llvm/AutoUpgrade.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/DataStream.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/OperandTraits.h"
 using namespace llvm;
 
 enum {
@@ -47,7 +47,7 @@ void BitcodeReader::FreeState() {
   ValueList.clear();
   MDValueList.clear();
 
-  std::vector<AttrListPtr>().swap(MAttributes);
+  std::vector<AttributeSet>().swap(MAttributes);
   std::vector<BasicBlock*>().swap(FunctionBBs);
   std::vector<Function*>().swap(FunctionsWithBodies);
   DeferredFunctionInfo.clear();
@@ -487,7 +487,7 @@ bool BitcodeReader::ParseAttributeBlock() {
                                                   Attributes::get(Context, B)));
       }
 
-      MAttributes.push_back(AttrListPtr::get(Context, Attrs));
+      MAttributes.push_back(AttributeSet::get(Context, Attrs));
       Attrs.clear();
       break;
     }
@@ -1573,10 +1573,11 @@ bool BitcodeReader::ParseModule(bool Resume) {
       break;
     }
     case bitc::MODULE_CODE_DEPLIB: {  // DEPLIB: [strchr x N]
+      // FIXME: Remove in 4.0.
       std::string S;
       if (ConvertToString(Record, 0, S))
         return Error("Invalid MODULE_CODE_DEPLIB record");
-      TheModule->addLibrary(S);
+      // Ignore value.
       break;
     }
     case bitc::MODULE_CODE_SECTIONNAME: {  // SECTIONNAME: [strchr x N]
@@ -2052,7 +2053,22 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
                    Opc == Instruction::AShr) {
           if (Record[OpNum] & (1 << bitc::PEO_EXACT))
             cast<BinaryOperator>(I)->setIsExact(true);
+        } else if (isa<FPMathOperator>(I)) {
+          FastMathFlags FMF;
+          if (0 != (Record[OpNum] & FastMathFlags::UnsafeAlgebra))
+            FMF.setUnsafeAlgebra();
+          if (0 != (Record[OpNum] & FastMathFlags::NoNaNs))
+            FMF.setNoNaNs();
+          if (0 != (Record[OpNum] & FastMathFlags::NoInfs))
+            FMF.setNoInfs();
+          if (0 != (Record[OpNum] & FastMathFlags::NoSignedZeros))
+            FMF.setNoSignedZeros();
+          if (0 != (Record[OpNum] & FastMathFlags::AllowReciprocal))
+            FMF.setAllowReciprocal();
+          if (FMF.any())
+            I->setFastMathFlags(FMF);
         }
+
       }
       break;
     }
@@ -2390,7 +2406,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
     case bitc::FUNC_CODE_INST_INVOKE: {
       // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...]
       if (Record.size() < 4) return Error("Invalid INVOKE record");
-      AttrListPtr PAL = getAttributes(Record[0]);
+      AttributeSet PAL = getAttributes(Record[0]);
       unsigned CCInfo = Record[1];
       BasicBlock *NormalBB = getBasicBlock(Record[2]);
       BasicBlock *UnwindBB = getBasicBlock(Record[3]);
@@ -2655,7 +2671,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       if (Record.size() < 3)
         return Error("Invalid CALL record");
 
-      AttrListPtr PAL = getAttributes(Record[0]);
+      AttributeSet PAL = getAttributes(Record[0]);
       unsigned CCInfo = Record[1];
 
       unsigned OpNum = 2;
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index f3b516ecfd..6d27eff128 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -14,14 +14,14 @@
 #ifndef BITCODE_READER_H
 #define BITCODE_READER_H
 
-#include "llvm/GVMaterializer.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/Attributes.h"
-#include "llvm/Type.h"
-#include "llvm/OperandTraits.h"
 #include "llvm/Bitcode/BitstreamReader.h"
 #include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/GVMaterializer.h"
+#include "llvm/OperandTraits.h"
 #include "llvm/Support/ValueHandle.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/Type.h"
 #include <vector>
 
 namespace llvm {
@@ -146,7 +146,7 @@ class BitcodeReader : public GVMaterializer {
   /// MAttributes - The set of attributes by index.  Index zero in the
   /// file is for null, and is thus not represented here.  As such all indices
   /// are off by one.
-  std::vector<AttrListPtr> MAttributes;
+  std::vector<AttributeSet> MAttributes;
 
   /// FunctionBBs - While parsing a function body, this is a list of the basic
   /// blocks for the function.
@@ -246,10 +246,10 @@ private:
     if (ID >= FunctionBBs.size()) return 0; // Invalid ID
     return FunctionBBs[ID];
   }
-  AttrListPtr getAttributes(unsigned i) const {
+  AttributeSet getAttributes(unsigned i) const {
     if (i-1 < MAttributes.size())
       return MAttributes[i-1];
-    return AttrListPtr();
+    return AttributeSet();
   }
 
   /// getValueTypePair - Read a value/type pair out of the specified record from
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index cf3c9fd74e..ffe95d8f27 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -12,22 +12,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "ValueEnumerator.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Bitcode/BitstreamWriter.h"
 #include "llvm/Bitcode/LLVMBitCodes.h"
-#include "ValueEnumerator.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Operator.h"
-#include "llvm/ValueSymbolTable.h"
-#include "llvm/ADT/Triple.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ValueSymbolTable.h"
 #include <cctype>
 #include <map>
 using namespace llvm;
@@ -164,14 +164,14 @@ static void WriteStringRecord(unsigned Code, StringRef Str,
 // Emit information about parameter attributes.
 static void WriteAttributeTable(const ValueEnumerator &VE,
                                 BitstreamWriter &Stream) {
-  const std::vector<AttrListPtr> &Attrs = VE.getAttributes();
+  const std::vector<AttributeSet> &Attrs = VE.getAttributes();
   if (Attrs.empty()) return;
 
   Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3);
 
   SmallVector<uint64_t, 64> Record;
   for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
-    const AttrListPtr &A = Attrs[i];
+    const AttributeSet &A = Attrs[i];
     for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) {
       const AttributeWithIndex &PAWI = A.getSlot(i);
       Record.push_back(PAWI.Index);
@@ -392,10 +392,6 @@ static unsigned getEncodedThreadLocalMode(const GlobalVariable *GV) {
 // descriptors for global variables, and function prototype info.
 static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
                             BitstreamWriter &Stream) {
-  // Emit the list of dependent libraries for the Module.
-  for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
-    WriteStringRecord(bitc::MODULE_CODE_DEPLIB, *I, 0/*TODO*/, Stream);
-
   // Emit various pieces of data attached to a module.
   if (!M->getTargetTriple().empty())
     WriteStringRecord(bitc::MODULE_CODE_TRIPLE, M->getTargetTriple(),
@@ -553,6 +549,18 @@ static uint64_t GetOptimizationFlags(const Value *V) {
                dyn_cast<PossiblyExactOperator>(V)) {
     if (PEO->isExact())
       Flags |= 1 << bitc::PEO_EXACT;
+  } else if (const FPMathOperator *FPMO =
+             dyn_cast<const FPMathOperator>(V)) {
+    if (FPMO->hasUnsafeAlgebra())
+      Flags |= FastMathFlags::UnsafeAlgebra;
+    if (FPMO->hasNoNaNs())
+      Flags |= FastMathFlags::NoNaNs;
+    if (FPMO->hasNoInfs())
+      Flags |= FastMathFlags::NoInfs;
+    if (FPMO->hasNoSignedZeros())
+      Flags |= FastMathFlags::NoSignedZeros;
+    if (FPMO->hasAllowReciprocal())
+      Flags |= FastMathFlags::AllowReciprocal;
   }
 
   return Flags;
@@ -701,7 +709,7 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) {
 
   // Write metadata kinds
   // METADATA_KIND - [n x [id, name]]
-  SmallVector<StringRef, 4> Names;
+  SmallVector<StringRef, 8> Names;
   M->getMDKindNames(Names);
 
   if (Names.empty()) return;
@@ -1931,7 +1939,7 @@ static void EmitDarwinBCHeaderAndTrailer(SmallVectorImpl<char> &Buffer,
 /// WriteBitcodeToFile - Write the specified module to the specified output
 /// stream.
 void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
-  SmallVector<char, 1024> Buffer;
+  SmallVector<char, 0> Buffer;
   Buffer.reserve(256*1024);
 
   // If this is darwin or another generic macho target, reserve space for the
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 6c43f433b8..3c2cce0a16 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -12,15 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "ValueEnumerator.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/ValueSymbolTable.h"
 #include "llvm/Instructions.h"
+#include "llvm/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ValueSymbolTable.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -418,7 +418,7 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) {
     EnumerateMetadata(V);
 }
 
-void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
+void ValueEnumerator::EnumerateAttributes(const AttributeSet &PAL) {
   if (PAL.isEmpty()) return;  // null is always 0.
   // Do a lookup.
   unsigned &Entry = AttributeMap[PAL.getRawPointer()];
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index 896fc3d0c8..5aeea2001f 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -29,7 +29,7 @@ class Function;
 class Module;
 class MDNode;
 class NamedMDNode;
-class AttrListPtr;
+class AttributeSet;
 class ValueSymbolTable;
 class MDSymbolTable;
 class raw_ostream;
@@ -54,7 +54,7 @@ private:
 
   typedef DenseMap<void*, unsigned> AttributeMapType;
   AttributeMapType AttributeMap;
-  std::vector<AttrListPtr> Attributes;
+  std::vector<AttributeSet> Attributes;
 
   /// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by
   /// the "getGlobalBasicBlockID" method.
@@ -98,7 +98,7 @@ public:
   unsigned getInstructionID(const Instruction *I) const;
   void setInstructionID(const Instruction *I);
 
-  unsigned getAttributeID(const AttrListPtr &PAL) const {
+  unsigned getAttributeID(const AttributeSet &PAL) const {
     if (PAL.isEmpty()) return 0;  // Null maps to zero.
     AttributeMapType::const_iterator I = AttributeMap.find(PAL.getRawPointer());
     assert(I != AttributeMap.end() && "Attribute not in ValueEnumerator!");
@@ -121,7 +121,7 @@ public:
   const std::vector<const BasicBlock*> &getBasicBlocks() const {
     return BasicBlocks;
   }
-  const std::vector<AttrListPtr> &getAttributes() const {
+  const std::vector<AttributeSet> &getAttributes() const {
     return Attributes;
   }
 
@@ -146,7 +146,7 @@ private:
   void EnumerateValue(const Value *V);
   void EnumerateType(Type *T);
   void EnumerateOperandType(const Value *V);
-  void EnumerateAttributes(const AttrListPtr &PAL);
+  void EnumerateAttributes(const AttributeSet &PAL);
 
   void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
   void EnumerateNamedMetadata(const Module *M);
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index fb63c63f32..c65d496dba 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -8,6 +8,7 @@ add_subdirectory(Linker)
 add_subdirectory(Analysis)
 add_subdirectory(MC)
 add_subdirectory(Object)
+add_subdirectory(Option)
 add_subdirectory(DebugInfo)
 add_subdirectory(ExecutionEngine)
 add_subdirectory(Target)
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 7a1c049d52..14c13c4fef 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -20,14 +20,14 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 // If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod
@@ -616,7 +616,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
   const TargetRegisterClass *SuperRC =
     TRI->getMinimalPhysRegClass(SuperReg, MVT::Other);
 
-  ArrayRef<unsigned> Order = RegClassInfo.getOrder(SuperRC);
+  ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(SuperRC);
   if (Order.empty()) {
     DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");
     return false;
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 7067784854..6683630fba 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -18,15 +18,15 @@
 #define LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
 
 #include "AntiDepBreaker.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <map>
 
 namespace llvm {
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index 7cde136c5e..94754a0d35 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -14,10 +14,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "regalloc"
 #include "AllocationOrder.h"
-#include "VirtRegMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -25,56 +30,19 @@ using namespace llvm;
 AllocationOrder::AllocationOrder(unsigned VirtReg,
                                  const VirtRegMap &VRM,
                                  const RegisterClassInfo &RegClassInfo)
-  : Begin(0), End(0), Pos(0), RCI(RegClassInfo), OwnedBegin(false) {
-  const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg);
-  std::pair<unsigned, unsigned> HintPair =
-    VRM.getRegInfo().getRegAllocationHint(VirtReg);
-  const MachineRegisterInfo &MRI = VRM.getRegInfo();
-
-  // HintPair.second is a register, phys or virt.
-  Hint = HintPair.second;
-
-  // Translate to physreg, or 0 if not assigned yet.
-  if (TargetRegisterInfo::isVirtualRegister(Hint))
-    Hint = VRM.getPhys(Hint);
-
-  // The first hint pair component indicates a target-specific hint.
-  if (HintPair.first) {
-    const TargetRegisterInfo &TRI = VRM.getTargetRegInfo();
-    // The remaining allocation order may depend on the hint.
-    ArrayRef<uint16_t> Order =
-      TRI.getRawAllocationOrder(RC, HintPair.first, Hint,
-                                VRM.getMachineFunction());
-    if (Order.empty())
-      return;
-
-    // Copy the allocation order with reserved registers removed.
-    OwnedBegin = true;
-    unsigned *P = new unsigned[Order.size()];
-    Begin = P;
-    for (unsigned i = 0; i != Order.size(); ++i)
-      if (!MRI.isReserved(Order[i]))
-        *P++ = Order[i];
-    End = P;
-
-    // Target-dependent hints require resolution.
-    Hint = TRI.ResolveRegAllocHint(HintPair.first, Hint,
-                                   VRM.getMachineFunction());
-  } else {
-    // If there is no hint or just a normal hint, use the cached allocation
-    // order from RegisterClassInfo.
-    ArrayRef<unsigned> O = RCI.getOrder(RC);
-    Begin = O.begin();
-    End = O.end();
-  }
-
-  // The hint must be a valid physreg for allocation.
-  if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
-               !RC->contains(Hint) || MRI.isReserved(Hint)))
-    Hint = 0;
-}
-
-AllocationOrder::~AllocationOrder() {
-  if (OwnedBegin)
-    delete [] Begin;
+  : Pos(0) {
+  const MachineFunction &MF = VRM.getMachineFunction();
+  const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo();
+  Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg));
+  TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM);
+  rewind();
+
+  DEBUG({
+    if (!Hints.empty()) {
+      dbgs() << "hints:";
+      for (unsigned I = 0, E = Hints.size(); I != E; ++I)
+        dbgs() << ' ' << PrintReg(Hints[I], TRI);
+      dbgs() << '\n';
+    }
+  });
 }
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index 0ce7e0c3b5..a5293f60a0 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -17,21 +17,21 @@
 #ifndef LLVM_CODEGEN_ALLOCATIONORDER_H
 #define LLVM_CODEGEN_ALLOCATIONORDER_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/MCRegisterInfo.h"
+
 namespace llvm {
 
 class RegisterClassInfo;
 class VirtRegMap;
 
 class AllocationOrder {
-  const unsigned *Begin;
-  const unsigned *End;
-  const unsigned *Pos;
-  const RegisterClassInfo &RCI;
-  unsigned Hint;
-  bool OwnedBegin;
-public:
+  SmallVector<MCPhysReg, 16> Hints;
+  ArrayRef<MCPhysReg> Order;
+  int Pos;
 
-  /// AllocationOrder - Create a new AllocationOrder for VirtReg.
+public:
+  /// Create a new AllocationOrder for VirtReg.
   /// @param VirtReg      Virtual register to allocate for.
   /// @param VRM          Virtual register map for function.
   /// @param RegClassInfo Information about reserved and allocatable registers.
@@ -39,32 +39,30 @@ public:
                   const VirtRegMap &VRM,
                   const RegisterClassInfo &RegClassInfo);
 
-  ~AllocationOrder();
-
-  /// next - Return the next physical register in the allocation order, or 0.
-  /// It is safe to call next again after it returned 0.
-  /// It will keep returning 0 until rewind() is called.
+  /// Return the next physical register in the allocation order, or 0.
+  /// It is safe to call next() again after it returned 0, it will keep
+  /// returning 0 until rewind() is called.
   unsigned next() {
-    // First take the hint.
-    if (!Pos) {
-      Pos = Begin;
-      if (Hint)
-        return Hint;
-    }
-    // Then look at the order from TRI.
-    while (Pos != End) {
-      unsigned Reg = *Pos++;
-      if (Reg != Hint)
+    if (Pos < 0)
+      return Hints.end()[Pos++];
+    while (Pos < int(Order.size())) {
+      unsigned Reg = Order[Pos++];
+      if (!isHint(Reg))
         return Reg;
     }
     return 0;
   }
 
-  /// rewind - Start over from the beginning.
-  void rewind() { Pos = 0; }
+  /// Start over from the beginning.
+  void rewind() { Pos = -int(Hints.size()); }
+
+  /// Return true if the last register returned from next() was a preferred register.
+  bool isHint() const { return Pos <= 0; }
 
-  /// isHint - Return true if PhysReg is a preferred register.
-  bool isHint(unsigned PhysReg) const { return PhysReg == Hint; }
+  /// Return true if PhysReg is a preferred register.
+  bool isHint(unsigned PhysReg) const {
+    return std::find(Hints.begin(), Hints.end(), PhysReg) != Hints.end();
+  }
 };
 
 } // end namespace llvm
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 5162ad762e..88fffe0726 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -13,19 +13,19 @@
 
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 /// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index dec80a43f6..bcbbf2b8b9 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -12,29 +12,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfException.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 cl::opt<bool>
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 89f278b54c..0b04d3099d 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -15,8 +15,10 @@
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "DwarfDebug.h"
 #include "DwarfException.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -24,7 +26,8 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DebugInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -32,20 +35,17 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Timer.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Timer.h"
 using namespace llvm;
 
 static const char *DWARFGroupName = "DWARF Emission";
@@ -149,6 +149,8 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 bool AsmPrinter::doInitialization(Module &M) {
+  OutStreamer.InitStreamer();
+
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   MMI->AnalyzeModule(M);
 
@@ -324,8 +326,13 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
       return;
     }
 
-    if (Align == 1 ||
-        MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
+    // Use .lcomm only if it supports user-specified alignment.
+    // Otherwise, while it would still be correct to use .lcomm in some
+    // cases (e.g. when Align == 1), the external assembler might enfore
+    // some -unknown- default alignment behavior, which could cause
+    // spurious differences between external and integrated assembler.
+    // Prefer to simply fall back to .local / .comm in this case.
+    if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
       // .lcomm _foo, 42
       OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align);
       return;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index b9aa5fc193..6ef7d2d130 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -13,19 +13,19 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/MC/MachineLocation.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/DataLayout.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 50f0fc30a0..10a56af5d3 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -13,26 +13,26 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCTargetAsmParser.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/Module.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 4d73b3c222..f1209d03cf 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -14,10 +14,10 @@
 #include "DIE.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index 05e0f2fb63..6a172b981d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfAccelTable.h"
-#include "DwarfDebug.h"
 #include "DIE.h"
-#include "llvm/ADT/Twine.h"
+#include "DwarfDebug.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCStreamer.h"
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 92d1bbe4f7..daca0cb5dd 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -14,18 +14,18 @@
 #ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
 #define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__
 
-#include "llvm/ADT/StringMap.h"
+#include "DIE.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
-#include "DIE.h"
-#include <vector>
 #include <map>
+#include <vector>
 
 // The dwarf accelerator tables are an indirect hash table optimized
 // for null lookup rather than access to known data. They are output into
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 4fdd5ca252..2d7a5f32ae 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -12,31 +12,31 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfException.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/MC/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 DwarfCFIException::DwarfCFIException(AsmPrinter *A)
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 4ca42283dd..f0ea8893ca 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -13,28 +13,28 @@
 
 #define DEBUG_TYPE "dwarfdebug"
 
-#include "DwarfAccelTable.h"
 #include "DwarfCompileUnit.h"
+#include "DwarfAccelTable.h"
 #include "DwarfDebug.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/Constants.h"
 #include "llvm/DIBuilder.h"
+#include "llvm/DataLayout.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 
 /// CompileUnit - Compile unit constructor.
-CompileUnit::CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A,
+CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A,
                          DwarfDebug *DW)
-  : ID(I), Language(L), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
+  : UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) {
   DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
 }
 
@@ -51,6 +51,50 @@ DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) {
   return Value;
 }
 
+/// getDefaultLowerBound - Return the default lower bound for an array. If the
+/// DWARF version doesn't handle the language, return -1.
+int64_t CompileUnit::getDefaultLowerBound() const {
+  switch (Language) {
+  default:
+    break;
+
+  case dwarf::DW_LANG_C89:
+  case dwarf::DW_LANG_C99:
+  case dwarf::DW_LANG_C:
+  case dwarf::DW_LANG_C_plus_plus:
+  case dwarf::DW_LANG_ObjC:
+  case dwarf::DW_LANG_ObjC_plus_plus:
+    return 0;
+
+  case dwarf::DW_LANG_Fortran77:
+  case dwarf::DW_LANG_Fortran90:
+  case dwarf::DW_LANG_Fortran95:
+    return 1;
+
+  // The languages below have valid values only if the DWARF version >= 4.
+  case dwarf::DW_LANG_Java:
+  case dwarf::DW_LANG_Python:
+  case dwarf::DW_LANG_UPC:
+  case dwarf::DW_LANG_D:
+    if (dwarf::DWARF_VERSION >= 4)
+      return 0;
+    break;
+
+  case dwarf::DW_LANG_Ada83:
+  case dwarf::DW_LANG_Ada95:
+  case dwarf::DW_LANG_Cobol74:
+  case dwarf::DW_LANG_Cobol85:
+  case dwarf::DW_LANG_Modula2:
+  case dwarf::DW_LANG_Pascal83:
+  case dwarf::DW_LANG_PLI:
+    if (dwarf::DWARF_VERSION >= 4)
+      return 1;
+    break;
+  }
+
+  return -1;
+}
+
 /// addFlag - Add a flag that is true.
 void CompileUnit::addFlag(DIE *Die, unsigned Attribute) {
   if (!DD->useDarwinGDBCompat())
@@ -1250,22 +1294,25 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR,
                                        DIE *IndexTy) {
   DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
   addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
-  uint64_t L = SR.getLo();
-  uint64_t H = SR.getHi();
 
-  // The L value defines the lower bounds which is typically zero for C/C++. The
-  // H value is the upper bounds.  Values are 64 bit.  H - L + 1 is the size
-  // of the array. If L > H then do not emit DW_AT_lower_bound and
-  // DW_AT_upper_bound attributes. If L is zero and H is also zero then the
-  // array has one element and in such case do not emit lower bound.
+  // The LowerBound value defines the lower bounds which is typically zero for
+  // C/C++. The Count value is the number of elements.  Values are 64 bit. If
+  // Count == -1 then the array is unbounded and we do not emit
+  // DW_AT_lower_bound and DW_AT_upper_bound attributes. If LowerBound == 0 and
+  // Count == 0, then the array has zero elements in which case we do not emit
+  // an upper bound.
+  int64_t LowerBound = SR.getLo();
+  int64_t DefaultLowerBound = getDefaultLowerBound();
+  int64_t Count = SR.getCount();
+
+  if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound)
+    addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, LowerBound);
+
+  if (Count != -1 && Count != 0)
+    // FIXME: An unbounded array should reference the expression that defines
+    // the array.
+    addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, LowerBound + Count - 1);
 
-  if (L > H) {
-    Buffer.addChild(DW_Subrange);
-    return;
-  }
-  if (L)
-    addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
-  addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
   Buffer.addChild(DW_Subrange);
 }
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index fad9b6e066..bd63ff5f9b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -15,10 +15,10 @@
 #define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H
 
 #include "DIE.h"
-#include "llvm/DebugInfo.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/DebugInfo.h"
 
 namespace llvm {
 
@@ -32,9 +32,9 @@ class DbgVariable;
 /// CompileUnit - This dwarf writer support class manages information associated
 /// with a source file.
 class CompileUnit {
-  /// ID - File identifier for source.
+  /// UniqueID - a numeric ID unique among all CUs in the module
   ///
-  unsigned ID;
+  unsigned UniqueID;
 
   /// Language - The DW_AT_language of the compile unit
   ///
@@ -79,12 +79,16 @@ class CompileUnit {
   /// corresponds to the MDNode mapped with the subprogram DIE.
   DenseMap<DIE *, const MDNode *> ContainingTypeMap;
 
+  /// getLowerBoundDefault - Return the default lower bound for an array. If the
+  /// DWARF version doesn't handle the language, return -1.
+  int64_t getDefaultLowerBound() const;
+
 public:
-  CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW);
+  CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW);
   ~CompileUnit();
 
   // Accessors.
-  unsigned getID()                  const { return ID; }
+  unsigned getUniqueID()            const { return UniqueID; }
   unsigned getLanguage()            const { return Language; }
   DIE* getCUDie()                   const { return CUDie.get(); }
   const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
@@ -174,7 +178,6 @@ public:
   void setIndexTyDie(DIE *D) {
     IndexTyDie = D;
   }
-public:
 
   /// addFlag - Add a flag that is true to the DIE.
   void addFlag(DIE *Die, unsigned Attribute);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 1a9deb6641..ad18024559 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -16,34 +16,34 @@
 #include "DIE.h"
 #include "DwarfAccelTable.h"
 #include "DwarfCompileUnit.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
 #include "llvm/DIBuilder.h"
-#include "llvm/Module.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DebugInfo.h"
 #include "llvm/Instructions.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/Timer.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
@@ -78,8 +78,8 @@ static cl::opt<DefaultOnOff> DarwinGDBCompat("darwin-gdb-compat", cl::Hidden,
                 clEnumValEnd),
      cl::init(Default));
 
-static cl::opt<DefaultOnOff> DwarfFission("dwarf-fission", cl::Hidden,
-     cl::desc("Output prototype dwarf fission."),
+static cl::opt<DefaultOnOff> SplitDwarf("split-dwarf", cl::Hidden,
+     cl::desc("Output prototype dwarf split debug info."),
      cl::values(
                 clEnumVal(Default, "Default for platform"),
                 clEnumVal(Enable, "Enabled"),
@@ -94,8 +94,8 @@ namespace {
 
 //===----------------------------------------------------------------------===//
 
-/// Configuration values for initial hash set sizes (log2).
-///
+// Configuration values for initial hash set sizes (log2).
+//
 static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
 
 namespace llvm {
@@ -156,7 +156,9 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   : Asm(A), MMI(Asm->MMI), FirstCU(0),
     AbbreviationsSet(InitAbbreviationsSetSize),
     SourceIdMap(DIEValueAllocator), StringPool(DIEValueAllocator),
-    PrevLabel(NULL) {
+    PrevLabel(NULL), GlobalCUIndexCount(0),
+    InfoHolder(A, &AbbreviationsSet, &Abbreviations),
+    SkeletonCU(0), SkeletonHolder(A, &AbbreviationsSet, &Abbreviations) {
   NextStringPoolNumber = 0;
 
   DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
@@ -183,10 +185,10 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
   } else
     HasDwarfAccelTables = DwarfAccelTables == Enable ? true : false;
 
-  if (DwarfFission == Default)
-    HasDwarfFission = false;
+  if (SplitDwarf == Default)
+    HasSplitDwarf = false;
   else
-    HasDwarfFission = DwarfFission == Enable ? true : false;
+    HasSplitDwarf = SplitDwarf == Enable ? true : false;
 
   {
     NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
@@ -196,8 +198,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
 DwarfDebug::~DwarfDebug() {
 }
 
-/// emitSectionSym - Switch to the specified MCSection and emit an assembler
-/// temporary label to it if SymbolStem is specified.
+// Switch to the specified MCSection and emit an assembler
+// temporary label to it if SymbolStem is specified.
 static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section,
                                 const char *SymbolStem = 0) {
   Asm->OutStreamer.SwitchSection(Section);
@@ -220,32 +222,32 @@ MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
   return Entry.first = Asm->GetTempSymbol("string", Entry.second);
 }
 
-/// assignAbbrevNumber - Define a unique number for the abbreviation.
-///
-void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
+// Define a unique number for the abbreviation.
+//
+void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) {
   // Profile the node so that we can make it unique.
   FoldingSetNodeID ID;
   Abbrev.Profile(ID);
 
   // Check the set for priors.
-  DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev);
+  DIEAbbrev *InSet = AbbreviationsSet->GetOrInsertNode(&Abbrev);
 
   // If it's newly added.
   if (InSet == &Abbrev) {
     // Add to abbreviation list.
-    Abbreviations.push_back(&Abbrev);
+    Abbreviations->push_back(&Abbrev);
 
     // Assign the vector position + 1 as its number.
-    Abbrev.setNumber(Abbreviations.size());
+    Abbrev.setNumber(Abbreviations->size());
   } else {
     // Assign existing abbreviation number.
     Abbrev.setNumber(InSet->getNumber());
   }
 }
 
-/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
-/// printer to not emit usual symbol prefix before the symbol name is used then
-/// return linkage name after skipping this special LLVM prefix.
+// If special LLVM prefix that is used to inform the asm
+// printer to not emit usual symbol prefix before the symbol name is used then
+// return linkage name after skipping this special LLVM prefix.
 static StringRef getRealLinkageName(StringRef LinkageName) {
   char One = '\1';
   if (LinkageName.startswith(StringRef(&One, 1)))
@@ -310,10 +312,9 @@ static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP,
   }
 }
 
-/// updateSubprogramScopeDIE - Find DIE for the given subprogram and
-/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
-/// If there are global variables in this scope then create and insert
-/// DIEs for these variables.
+// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
+// and DW_AT_high_pc attributes. If there are global variables in this
+// scope then create and insert DIEs for these variables.
 DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
                                           const MDNode *SPNode) {
   DIE *SPDie = SPCU->getDIE(SPNode);
@@ -383,8 +384,8 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU,
   return SPDie;
 }
 
-/// constructLexicalScope - Construct new DW_TAG_lexical_block
-/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
+// Construct new DW_TAG_lexical_block for this scope and attach
+// DW_AT_low_pc/DW_AT_high_pc labels.
 DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
                                           LexicalScope *Scope) {
   DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
@@ -427,9 +428,8 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
   return ScopeDIE;
 }
 
-/// constructInlinedScopeDIE - This scope represents inlined body of
-/// a function. Construct DIE to represent this concrete inlined copy
-/// of the function.
+// This scope represents inlined body of a function. Construct DIE to
+// represent this concrete inlined copy of the function.
 DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
                                           LexicalScope *Scope) {
   const SmallVector<InsnRange, 4> &Ranges = Scope->getRanges();
@@ -511,7 +511,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
   return ScopeDIE;
 }
 
-/// constructScopeDIE - Construct a DIE for this scope.
+// Construct a DIE for this scope.
 DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
   if (!Scope || !Scope->getScopeNode())
     return NULL;
@@ -580,13 +580,12 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
   return ScopeDIE;
 }
 
-/// getOrCreateSourceID - Look up the source id with the given directory and
-/// source file names. If none currently exists, create a new id and insert it
-/// in the SourceIds map. This can update DirectoryNames and SourceFileNames
-/// maps as well.
+// Look up the source id with the given directory and source file names.
+// If none currently exists, create a new id and insert it in the
+// SourceIds map. This can update DirectoryNames and SourceFileNames maps
+// as well.
 unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
-                                         StringRef DirName,
-                                         StringRef Extra) { // @LOCALMOD
+                                         StringRef DirName) {
   // If FE did not provide a file name, then assume stdin.
   if (FileName.empty())
     return getOrCreateSourceID("<stdin>", StringRef());
@@ -602,9 +601,6 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
   NamePair += DirName;
   NamePair += '\0'; // Zero bytes are not allowed in paths.
   NamePair += FileName;
-  // @LOCALMOD
-  NamePair += '\0'; // Zero bytes are not allowed in paths.
-  NamePair += Extra;
 
   StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId);
   if (Ent.getValue() != SrcId)
@@ -616,47 +612,18 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName,
   return SrcId;
 }
 
-// @LOCALMOD-BEGIN
-// A special version of GetOrCreateSourceID for CompileUnits.
-// It is possible that with bitcode linking, we end up with distinct
-// compile units based on the same source file.
-// E.g., compile foo.c with -DMACRO1 to foo1.bc, then compile
-// foo.c again with -DMACRO2 to foo2.bc and link.
-// We use additional information to form a unique ID in that case.
-unsigned DwarfDebug::getOrCreateCompileUnitID(StringRef Filename,
-                                              StringRef Dirname,
-                                              const MDNode *N) {
-  std::string DIUnitStr;
-  raw_string_ostream ostr(DIUnitStr);
-
-  // Using information from the compile unit (N)'s getEnumTypes(),
-  // getRetainedTypes(), getSubprograms(), getGlobalVariables()
-  // could be pretty expensive.
-  // Cheat and use the MDNode's address as an additional identifying factor.
-  // constructCompileUnit() is only called once per compile unit.
-  ostr << static_cast<const void*>(N);
-  return getOrCreateSourceID(Filename, Dirname, ostr.str());
-}
-// @LOCALMOD-END
-
-/// constructCompileUnit - Create new CompileUnit for the given
-/// metadata node with tag DW_TAG_compile_unit.
+// Create new CompileUnit for the given metadata node with tag DW_TAG_compile_unit.
 CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
   DICompileUnit DIUnit(N);
   StringRef FN = DIUnit.getFilename();
   CompilationDir = DIUnit.getDirectory();
-  // @LOCALMOD-BEGIN
-  unsigned ID;
-  if (Triple(Asm->TM.getTargetTriple()).isOSNaCl()) {
-    ID = getOrCreateCompileUnitID(FN, CompilationDir, N);
-  } else {
-    ID = getOrCreateSourceID(FN, CompilationDir);
-  }
-  // @LOCALMOD-END
+  // Call this to emit a .file directive if it wasn't emitted for the source
+  // file this CU comes from yet.
+  getOrCreateSourceID(FN, CompilationDir);
 
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
-  CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die,
-                                       Asm, this);
+  CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++,
+                                       DIUnit.getLanguage(), Die, Asm, this);
   NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
   NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
                  DIUnit.getLanguage());
@@ -687,11 +654,16 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
 
   if (!FirstCU)
     FirstCU = NewCU;
+  if (useSplitDwarf() && !SkeletonCU)
+    SkeletonCU = constructSkeletonCU(N);
+
+  InfoHolder.addUnit(NewCU);
+
   CUMap.insert(std::make_pair(N, NewCU));
   return NewCU;
 }
 
-/// construct SubprogramDIE - Construct subprogram DIE.
+// Construct subprogram DIE.
 void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
                                         const MDNode *N) {
   CompileUnit *&CURef = SPMap[N];
@@ -716,8 +688,7 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU,
   return;
 }
 
-/// collectInfoFromNamedMDNodes - Collect debug info from named mdnodes such
-/// as llvm.dbg.enum and llvm.dbg.ty
+// Collect debug info from named mdnodes such as llvm.dbg.enum and llvm.dbg.ty.
 void DwarfDebug::collectInfoFromNamedMDNodes(const Module *M) {
   if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp"))
     for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
@@ -748,8 +719,8 @@ void DwarfDebug::collectInfoFromNamedMDNodes(const Module *M) {
     }
 }
 
-/// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder.
-/// FIXME - Remove this when dragon-egg and llvm-gcc switch to DIBuilder.
+// Collect debug info using DebugInfoFinder.
+// FIXME - Remove this when dragonegg switches to DIBuilder.
 bool DwarfDebug::collectLegacyDebugInfo(const Module *M) {
   DebugInfoFinder DbgFinder;
   DbgFinder.processModule(*M);
@@ -770,6 +741,7 @@ bool DwarfDebug::collectLegacyDebugInfo(const Module *M) {
   for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
          E = DbgFinder.compile_unit_end(); I != E; ++I)
     constructCompileUnit(*I);
+
   // Create DIEs for each global variable.
   for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
          E = DbgFinder.global_variable_end(); I != E; ++I) {
@@ -789,9 +761,9 @@ bool DwarfDebug::collectLegacyDebugInfo(const Module *M) {
   return HasDebugInfo;
 }
 
-/// beginModule - Emit all Dwarf sections that should come prior to the
-/// content. Create global DIEs and emit initial debug info sections.
-/// This is invoked by the target AsmPrinter.
+// Emit all Dwarf sections that should come prior to the content. Create
+// global DIEs and emit initial debug info sections. This is invoked by
+// the target AsmPrinter.
 void DwarfDebug::beginModule() {
   if (DisableDebugInfoPrinting)
     return;
@@ -834,12 +806,12 @@ void DwarfDebug::beginModule() {
 void DwarfDebug::computeInlinedDIEs() {
   // Attach DW_AT_inline attribute with inlined subprogram DIEs.
   for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
-	 AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
+         AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
     DIE *ISP = *AI;
     FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
   }
   for (DenseMap<const MDNode *, DIE *>::iterator AI = AbstractSPDies.begin(),
-	 AE = AbstractSPDies.end(); AI != AE; ++AI) {
+         AE = AbstractSPDies.end(); AI != AE; ++AI) {
     DIE *ISP = AI->second;
     if (InlinedSubprogramDIEs.count(ISP))
       continue;
@@ -857,30 +829,30 @@ void DwarfDebug::collectDeadVariables() {
       DICompileUnit TheCU(CU_Nodes->getOperand(i));
       DIArray Subprograms = TheCU.getSubprograms();
       for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) {
-	DISubprogram SP(Subprograms.getElement(i));
-	if (ProcessedSPNodes.count(SP) != 0) continue;
-	if (!SP.Verify()) continue;
-	if (!SP.isDefinition()) continue;
-	DIArray Variables = SP.getVariables();
-	if (Variables.getNumElements() == 0) continue;
-
-	LexicalScope *Scope =
-	  new LexicalScope(NULL, DIDescriptor(SP), NULL, false);
-	DeadFnScopeMap[SP] = Scope;
-
-	// Construct subprogram DIE and add variables DIEs.
-	CompileUnit *SPCU = CUMap.lookup(TheCU);
-	assert(SPCU && "Unable to find Compile Unit!");
-	constructSubprogramDIE(SPCU, SP);
-	DIE *ScopeDIE = SPCU->getDIE(SP);
-	for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
-	  DIVariable DV(Variables.getElement(vi));
-	  if (!DV.Verify()) continue;
-	  DbgVariable *NewVar = new DbgVariable(DV, NULL);
-	  if (DIE *VariableDIE =
-	      SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope()))
-	    ScopeDIE->addChild(VariableDIE);
-	}
+        DISubprogram SP(Subprograms.getElement(i));
+        if (ProcessedSPNodes.count(SP) != 0) continue;
+        if (!SP.Verify()) continue;
+        if (!SP.isDefinition()) continue;
+        DIArray Variables = SP.getVariables();
+        if (Variables.getNumElements() == 0) continue;
+
+        LexicalScope *Scope =
+          new LexicalScope(NULL, DIDescriptor(SP), NULL, false);
+        DeadFnScopeMap[SP] = Scope;
+
+        // Construct subprogram DIE and add variables DIEs.
+        CompileUnit *SPCU = CUMap.lookup(TheCU);
+        assert(SPCU && "Unable to find Compile Unit!");
+        constructSubprogramDIE(SPCU, SP);
+        DIE *ScopeDIE = SPCU->getDIE(SP);
+        for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
+          DIVariable DV(Variables.getElement(vi));
+          if (!DV.Verify()) continue;
+          DbgVariable *NewVar = new DbgVariable(DV, NULL);
+          if (DIE *VariableDIE =
+              SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope()))
+            ScopeDIE->addChild(VariableDIE);
+        }
       }
     }
   }
@@ -897,13 +869,15 @@ void DwarfDebug::finalizeModuleInfo() {
   // Emit DW_AT_containing_type attribute to connect types with their
   // vtable holding type.
   for (DenseMap<const MDNode *, CompileUnit *>::iterator CUI = CUMap.begin(),
-	 CUE = CUMap.end(); CUI != CUE; ++CUI) {
+         CUE = CUMap.end(); CUI != CUE; ++CUI) {
     CompileUnit *TheCU = CUI->second;
     TheCU->constructContainingTypeDIEs();
   }
 
    // Compute DIE offsets and sizes.
-  computeSizeAndOffsets();
+  InfoHolder.computeSizeAndOffsets();
+  if (useSplitDwarf())
+    SkeletonHolder.computeSizeAndOffsets();
 }
 
 void DwarfDebug::endSections() {
@@ -920,8 +894,7 @@ void DwarfDebug::endSections() {
   }
 }
 
-/// endModule - Emit all Dwarf sections that should come after the content.
-///
+// Emit all Dwarf sections that should come after the content.
 void DwarfDebug::endModule() {
 
   if (!FirstCU) return;
@@ -936,11 +909,61 @@ void DwarfDebug::endModule() {
   // Emit initial sections.
   emitSectionLabels();
 
-  // Emit all the DIEs into a debug info section
-  emitDebugInfo();
+  if (!useSplitDwarf()) {
+    // Emit all the DIEs into a debug info section.
+    emitDebugInfo();
+
+    // Corresponding abbreviations into a abbrev section.
+    emitAbbreviations();
+
+    // Emit info into a debug loc section.
+    emitDebugLoc();
+
+    // Emit info into a debug aranges section.
+    emitDebugARanges();
+
+    // Emit info into a debug ranges section.
+    emitDebugRanges();
 
-  // Corresponding abbreviations into a abbrev section.
-  emitAbbreviations();
+    // Emit info into a debug macinfo section.
+    emitDebugMacInfo();
+
+    // Emit inline info.
+    // TODO: When we don't need the option anymore we
+    // can remove all of the code that this section
+    // depends upon.
+    if (useDarwinGDBCompat())
+      emitDebugInlineInfo();
+  } else {
+    // TODO: Fill this in for Fission sections and separate
+    // out information into new sections.
+
+    // Emit the debug info section and compile units.
+    emitDebugInfo();
+    emitDebugInfoDWO();
+
+    // Corresponding abbreviations into a abbrev section.
+    emitAbbreviations();
+
+    // Emit info into a debug loc section.
+    emitDebugLoc();
+
+    // Emit info into a debug aranges section.
+    emitDebugARanges();
+
+    // Emit info into a debug ranges section.
+    emitDebugRanges();
+
+    // Emit info into a debug macinfo section.
+    emitDebugMacInfo();
+
+    // Emit inline info.
+    // TODO: When we don't need the option anymore we
+    // can remove all of the code that this section
+    // depends upon.
+    if (useDarwinGDBCompat())
+      emitDebugInlineInfo();
+  }
 
   // Emit info into the dwarf accelerator table sections.
   if (useDwarfAccelTables()) {
@@ -956,26 +979,7 @@ void DwarfDebug::endModule() {
   if (useDarwinGDBCompat())
     emitDebugPubTypes();
 
-  // Emit info into a debug loc section.
-  emitDebugLoc();
-
-  // Emit info into a debug aranges section.
-  emitDebugARanges();
-
-  // Emit info into a debug ranges section.
-  emitDebugRanges();
-
-  // Emit info into a debug macinfo section.
-  emitDebugMacInfo();
-
-  // Emit inline info.
-  // TODO: When we don't need the option anymore we
-  // can remove all of the code that this section
-  // depends upon.
-  if (useDarwinGDBCompat())
-    emitDebugInlineInfo();
-
-  // Emit info into a debug str section.
+  // Finally emit string information into a string table.
   emitDebugStr();
 
   // clean up.
@@ -983,10 +987,15 @@ void DwarfDebug::endModule() {
   for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
          E = CUMap.end(); I != E; ++I)
     delete I->second;
-  FirstCU = NULL;  // Reset for the next Module, if any.
+
+  delete SkeletonCU;
+
+  // Reset these for the next Module if we have one.
+  FirstCU = NULL;
+  SkeletonCU = NULL;
 }
 
-/// findAbstractVariable - Find abstract variable, if any, associated with Var.
+// Find abstract variable, if any, associated with Var.
 DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
                                               DebugLoc ScopeLoc) {
   LLVMContext &Ctx = DV->getContext();
@@ -1006,8 +1015,7 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
   return AbsDbgVariable;
 }
 
-/// addCurrentFnArgument - If Var is a current function argument then add
-/// it to CurrentFnArguments list.
+// If Var is a current function argument then add it to CurrentFnArguments list.
 bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
                                       DbgVariable *Var, LexicalScope *Scope) {
   if (!LScopes.isCurrentFunctionScope(Scope))
@@ -1030,8 +1038,7 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
   return true;
 }
 
-/// collectVariableInfoFromMMITable - Collect variable information from
-/// side table maintained by MMI.
+// Collect variable information from side table maintained by MMI.
 void
 DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF,
                                    SmallPtrSet<const MDNode *, 16> &Processed) {
@@ -1060,8 +1067,8 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF,
   }
 }
 
-/// isDbgValueInDefinedReg - Return true if debug value, encoded by
-/// DBG_VALUE instruction, is in a defined reg.
+// Return true if debug value, encoded by DBG_VALUE instruction, is in a
+// defined reg.
 static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
   assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
   return MI->getNumOperands() == 3 &&
@@ -1069,8 +1076,7 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
          MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0;
 }
 
-/// getDebugLocEntry - Get .debug_loc entry for the instruction range starting
-/// at MI.
+// Get .debug_loc entry for the instruction range starting at MI.
 static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
                                          const MCSymbol *FLabel,
                                          const MCSymbol *SLabel,
@@ -1096,12 +1102,12 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
   llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!");
 }
 
-/// collectVariableInfo - Find variables for each lexical scope.
+// Find variables for each lexical scope.
 void
 DwarfDebug::collectVariableInfo(const MachineFunction *MF,
                                 SmallPtrSet<const MDNode *, 16> &Processed) {
 
-  /// collection info from MMI table.
+  // collection info from MMI table.
   collectVariableInfoFromMMITable(MF, Processed);
 
   for (SmallVectorImpl<const MDNode*>::const_iterator
@@ -1207,19 +1213,19 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
   }
 }
 
-/// getLabelBeforeInsn - Return Label preceding the instruction.
+// Return Label preceding the instruction.
 const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) {
   MCSymbol *Label = LabelsBeforeInsn.lookup(MI);
   assert(Label && "Didn't insert label before instruction");
   return Label;
 }
 
-/// getLabelAfterInsn - Return Label immediately following the instruction.
+// Return Label immediately following the instruction.
 const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
   return LabelsAfterInsn.lookup(MI);
 }
 
-/// beginInstruction - Process beginning of an instruction.
+// Process beginning of an instruction.
 void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   // Check if source location changes, but ignore DBG_VALUE locations.
   if (!MI->isDebugValue()) {
@@ -1261,7 +1267,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   I->second = PrevLabel;
 }
 
-/// endInstruction - Process end of an instruction.
+// Process end of an instruction.
 void DwarfDebug::endInstruction(const MachineInstr *MI) {
   // Don't create a new label after DBG_VALUE instructions.
   // They don't generate code.
@@ -1287,11 +1293,10 @@ void DwarfDebug::endInstruction(const MachineInstr *MI) {
   I->second = PrevLabel;
 }
 
-/// identifyScopeMarkers() -
-/// Each LexicalScope has first instruction and last instruction to mark
-/// beginning and end of a scope respectively. Create an inverse map that list
-/// scopes starts (and ends) with an instruction. One instruction may start (or
-/// end) multiple scopes. Ignore scopes that are not reachable.
+// Each LexicalScope has first instruction and last instruction to mark
+// beginning and end of a scope respectively. Create an inverse map that list
+// scopes starts (and ends) with an instruction. One instruction may start (or
+// end) multiple scopes. Ignore scopes that are not reachable.
 void DwarfDebug::identifyScopeMarkers() {
   SmallVector<LexicalScope *, 4> WorkList;
   WorkList.push_back(LScopes.getCurrentFunctionScope());
@@ -1320,15 +1325,15 @@ void DwarfDebug::identifyScopeMarkers() {
   }
 }
 
-/// getScopeNode - Get MDNode for DebugLoc's scope.
+// Get MDNode for DebugLoc's scope.
 static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) {
   if (MDNode *InlinedAt = DL.getInlinedAt(Ctx))
     return getScopeNode(DebugLoc::getFromDILocation(InlinedAt), Ctx);
   return DL.getScope(Ctx);
 }
 
-/// getFnDebugLoc - Walk up the scope chain of given debug loc and find
-/// line number info for the function.
+// Walk up the scope chain of given debug loc and find line number info
+// for the function.
 static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
   const MDNode *Scope = getScopeNode(DL, Ctx);
   DISubprogram SP = getDISubprogram(Scope);
@@ -1344,8 +1349,8 @@ static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) {
   return DebugLoc();
 }
 
-/// beginFunction - Gather pre-function debug information.  Assumes being
-/// emitted immediately after the function entry point.
+// Gather pre-function debug information.  Assumes being called immediately
+// after the function entry point has been emitted.
 void DwarfDebug::beginFunction(const MachineFunction *MF) {
   if (!MMI->hasDebugInfo()) return;
   LScopes.initialize(*MF);
@@ -1360,7 +1365,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
   assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned");
 
   const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
-  /// LiveUserVar - Map physreg numbers to the MDNode they contain.
+  // LiveUserVar - Map physreg numbers to the MDNode they contain.
   std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs());
 
   for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
@@ -1515,7 +1520,9 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
                                        MF->getFunction()->getContext());
     recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(),
                      FnStartDL.getScope(MF->getFunction()->getContext()),
-                     0);
+    // We'd like to list the prologue as "not statements" but GDB behaves
+    // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
+                     DWARF2_FLAG_IS_STMT);
   }
 }
 
@@ -1525,8 +1532,7 @@ void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) {
 //  Vars.push_back(Var);
 }
 
-/// endFunction - Gather and emit post-function debug information.
-///
+// Gather and emit post-function debug information.
 void DwarfDebug::endFunction(const MachineFunction *MF) {
   if (!MMI->hasDebugInfo() || LScopes.empty()) return;
 
@@ -1591,9 +1597,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   PrevLabel = NULL;
 }
 
-/// recordSourceLine - Register a source line with debug info. Returns the
-/// unique label that was emitted and which provides correspondence to
-/// the source line list.
+// Register a source line with debug info. Returns the  unique label that was
+// emitted and which provides correspondence to the source line list.
 void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
                                   unsigned Flags) {
   StringRef Fn;
@@ -1634,10 +1639,9 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
 // Emit Methods
 //===----------------------------------------------------------------------===//
 
-/// computeSizeAndOffset - Compute the size and offset of a DIE.
-///
+// Compute the size and offset of a DIE.
 unsigned
-DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset) {
+DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) {
   // Get the children.
   const std::vector<DIE *> &Children = Die->getChildren();
 
@@ -1646,7 +1650,7 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset) {
 
   // Get the abbreviation for this DIE.
   unsigned AbbrevNumber = Die->getAbbrevNumber();
-  const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+  const DIEAbbrev *Abbrev = Abbreviations->at(AbbrevNumber - 1);
 
   // Set DIE offset
   Die->setOffset(Offset);
@@ -1678,23 +1682,21 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset) {
   return Offset;
 }
 
-/// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
-///
-void DwarfDebug::computeSizeAndOffsets() {
-  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
-         E = CUMap.end(); I != E; ++I) {
-    // Compute size of compile unit header.
+// Compute the size and offset of all the DIEs.
+void DwarfUnits::computeSizeAndOffsets() {
+  for (SmallVector<CompileUnit *, 1>::iterator I = CUs.begin(),
+         E = CUs.end(); I != E; ++I) {
     unsigned Offset =
       sizeof(int32_t) + // Length of Compilation Unit Info
       sizeof(int16_t) + // DWARF version number
       sizeof(int32_t) + // Offset Into Abbrev. Section
       sizeof(int8_t);   // Pointer Size (in bytes)
-    computeSizeAndOffset(I->second->getCUDie(), Offset);
+
+    computeSizeAndOffset((*I)->getCUDie(), Offset);
   }
 }
 
-/// emitSectionLabels - Emit initial Dwarf sections with a label at
-/// the start of each one.
+// Emit initial Dwarf sections with a label at the start of each one.
 void DwarfDebug::emitSectionLabels() {
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
 
@@ -1723,8 +1725,7 @@ void DwarfDebug::emitSectionLabels() {
   emitSectionSym(Asm, TLOF.getDataSection());
 }
 
-/// emitDIE - Recursively emits a debug information entry.
-///
+// Recursively emits a debug information entry.
 void DwarfDebug::emitDIE(DIE *Die) {
   // Get the abbreviation for this DIE.
   unsigned AbbrevNumber = Die->getAbbrevNumber();
@@ -1813,12 +1814,8 @@ void DwarfDebug::emitDIE(DIE *Die) {
   }
 }
 
-/// emitDebugInfo - Emit the debug info section.
-///
-void DwarfDebug::emitDebugInfo() {
-  // Start debug info section.
-  Asm->OutStreamer.SwitchSection(
-                            Asm->getObjFileLowering().getDwarfInfoSection());
+void DwarfDebug::emitCompileUnits(const MCSection *Section) {
+  Asm->OutStreamer.SwitchSection(Section);
   for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
          E = CUMap.end(); I != E; ++I) {
     CompileUnit *TheCU = I->second;
@@ -1826,7 +1823,7 @@ void DwarfDebug::emitDebugInfo() {
 
     // Emit the compile units header.
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin",
-                                                  TheCU->getID()));
+                                                  TheCU->getUniqueID()));
 
     // Emit size of content not including length itself
     unsigned ContentSize = Die->getSize() +
@@ -1845,12 +1842,20 @@ void DwarfDebug::emitDebugInfo() {
     Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
 
     emitDIE(Die);
-    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID()));
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end",
+                                                  TheCU->getUniqueID()));
   }
 }
 
-/// emitAbbreviations - Emit the abbreviation section.
-///
+// Emit the debug info section.
+void DwarfDebug::emitDebugInfo() {
+  if (!useSplitDwarf())
+    emitCompileUnits(Asm->getObjFileLowering().getDwarfInfoSection());
+  else
+    emitSkeletonCU(Asm->getObjFileLowering().getDwarfInfoSection());
+}
+
+// Emit the abbreviation section.
 void DwarfDebug::emitAbbreviations() {
   // Check to see if it is worth the effort.
   if (!Abbreviations.empty()) {
@@ -1879,9 +1884,7 @@ void DwarfDebug::emitAbbreviations() {
   }
 }
 
-/// emitEndOfLineMatrix - Emit the last address of the section and the end of
-/// the line matrix.
-///
+// Emit the last address of the section and the end of the line matrix.
 void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   // Define last address of section.
   Asm->OutStreamer.AddComment("Extended Op");
@@ -1905,8 +1908,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   Asm->EmitInt8(1);
 }
 
-/// emitAccelNames - Emit visible names into a hashed accelerator table
-/// section.
+// Emit visible names into a hashed accelerator table section.
 void DwarfDebug::emitAccelNames() {
   DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
                                            dwarf::DW_FORM_data4));
@@ -1934,8 +1936,7 @@ void DwarfDebug::emitAccelNames() {
   AT.Emit(Asm, SectionBegin, this);
 }
 
-/// emitAccelObjC - Emit objective C classes and categories into a hashed
-/// accelerator table section.
+// Emit objective C classes and categories into a hashed accelerator table section.
 void DwarfDebug::emitAccelObjC() {
   DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
                                            dwarf::DW_FORM_data4));
@@ -1963,8 +1964,7 @@ void DwarfDebug::emitAccelObjC() {
   AT.Emit(Asm, SectionBegin, this);
 }
 
-/// emitAccelNamespace - Emit namespace dies into a hashed accelerator
-/// table.
+// Emit namespace dies into a hashed accelerator table.
 void DwarfDebug::emitAccelNamespaces() {
   DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
                                            dwarf::DW_FORM_data4));
@@ -1992,7 +1992,7 @@ void DwarfDebug::emitAccelNamespaces() {
   AT.Emit(Asm, SectionBegin, this);
 }
 
-/// emitAccelTypes() - Emit type dies into a hashed accelerator table.
+// Emit type dies into a hashed accelerator table.
 void DwarfDebug::emitAccelTypes() {
   std::vector<DwarfAccelTable::Atom> Atoms;
   Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset,
@@ -2036,22 +2036,25 @@ void DwarfDebug::emitDebugPubTypes() {
       Asm->getObjFileLowering().getDwarfPubTypesSection());
     Asm->OutStreamer.AddComment("Length of Public Types Info");
     Asm->EmitLabelDifference(
-      Asm->GetTempSymbol("pubtypes_end", TheCU->getID()),
-      Asm->GetTempSymbol("pubtypes_begin", TheCU->getID()), 4);
+      Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()),
+      Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4);
 
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin",
-                                                  TheCU->getID()));
+                                                  TheCU->getUniqueID()));
 
     if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
     Asm->EmitInt16(dwarf::DWARF_VERSION);
 
     Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
-    Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
+    Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin",
+                                              TheCU->getUniqueID()),
                            DwarfInfoSectionSym);
 
     Asm->OutStreamer.AddComment("Compilation Unit Length");
-    Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
-                             Asm->GetTempSymbol("info_begin", TheCU->getID()),
+    Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end",
+                                                TheCU->getUniqueID()),
+                             Asm->GetTempSymbol("info_begin",
+                                                TheCU->getUniqueID()),
                              4);
 
     const StringMap<DIE*> &Globals = TheCU->getGlobalTypes();
@@ -2071,12 +2074,11 @@ void DwarfDebug::emitDebugPubTypes() {
     Asm->OutStreamer.AddComment("End Mark");
     Asm->EmitInt32(0);
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end",
-                                                  TheCU->getID()));
+                                                  TheCU->getUniqueID()));
   }
 }
 
-/// emitDebugStr - Emit visible names into a debug str section.
-///
+// Emit visible names into a debug str section.
 void DwarfDebug::emitDebugStr() {
   // Check to see if it is worth the effort.
   if (StringPool.empty()) return;
@@ -2107,8 +2109,7 @@ void DwarfDebug::emitDebugStr() {
   }
 }
 
-/// emitDebugLoc - Emit visible names into a debug loc section.
-///
+// Emit visible names into a debug loc section.
 void DwarfDebug::emitDebugLoc() {
   if (DotDebugLocEntries.empty())
     return;
@@ -2207,16 +2208,14 @@ void DwarfDebug::emitDebugLoc() {
   }
 }
 
-/// emitDebugARanges - Emit visible names into a debug aranges section.
-///
+// Emit visible names into a debug aranges section.
 void DwarfDebug::emitDebugARanges() {
   // Start the dwarf aranges section.
   Asm->OutStreamer.SwitchSection(
                           Asm->getObjFileLowering().getDwarfARangesSection());
 }
 
-/// emitDebugRanges - Emit visible names into a debug ranges section.
-///
+// Emit visible names into a debug ranges section.
 void DwarfDebug::emitDebugRanges() {
   // Start the dwarf ranges section.
   Asm->OutStreamer.SwitchSection(
@@ -2232,8 +2231,7 @@ void DwarfDebug::emitDebugRanges() {
   }
 }
 
-/// emitDebugMacInfo - Emit visible names into a debug macinfo section.
-///
+// Emit visible names into a debug macinfo section.
 void DwarfDebug::emitDebugMacInfo() {
   if (const MCSection *LineInfo =
       Asm->getObjFileLowering().getDwarfMacroInfoSection()) {
@@ -2242,24 +2240,24 @@ void DwarfDebug::emitDebugMacInfo() {
   }
 }
 
-/// emitDebugInlineInfo - Emit inline info using following format.
-/// Section Header:
-/// 1. length of section
-/// 2. Dwarf version number
-/// 3. address size.
-///
-/// Entries (one "entry" for each function that was inlined):
-///
-/// 1. offset into __debug_str section for MIPS linkage name, if exists;
-///   otherwise offset into __debug_str for regular function name.
-/// 2. offset into __debug_str section for regular function name.
-/// 3. an unsigned LEB128 number indicating the number of distinct inlining
-/// instances for the function.
-///
-/// The rest of the entry consists of a {die_offset, low_pc} pair for each
-/// inlined instance; the die_offset points to the inlined_subroutine die in the
-/// __debug_info section, and the low_pc is the starting address for the
-/// inlining instance.
+// Emit inline info using following format.
+// Section Header:
+// 1. length of section
+// 2. Dwarf version number
+// 3. address size.
+//
+// Entries (one "entry" for each function that was inlined):
+//
+// 1. offset into __debug_str section for MIPS linkage name, if exists;
+//   otherwise offset into __debug_str for regular function name.
+// 2. offset into __debug_str section for regular function name.
+// 3. an unsigned LEB128 number indicating the number of distinct inlining
+// instances for the function.
+//
+// The rest of the entry consists of a {die_offset, low_pc} pair for each
+// inlined instance; the die_offset points to the inlined_subroutine die in the
+// __debug_info section, and the low_pc is the starting address for the
+// inlining instance.
 void DwarfDebug::emitDebugInlineInfo() {
   if (!Asm->MAI->doesDwarfUseInlineInfoSection())
     return;
@@ -2316,3 +2314,79 @@ void DwarfDebug::emitDebugInlineInfo() {
 
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1));
 }
+
+// DWARF5 Experimental Fission emitters.
+
+// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list,
+// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id,
+// DW_AT_ranges_base, DW_AT_addr_base. If DW_AT_ranges is present,
+// DW_AT_low_pc and DW_AT_high_pc are not used, and vice versa.
+CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) {
+  DICompileUnit DIUnit(N);
+  StringRef FN = DIUnit.getFilename();
+  CompilationDir = DIUnit.getDirectory();
+
+  DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
+  CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++,
+                                       DIUnit.getLanguage(), Die, Asm, this);
+  // FIXME: This should be the .dwo file.
+  NewCU->addString(Die, dwarf::DW_AT_GNU_dwo_name, FN);
+
+  // FIXME: We also need DW_AT_addr_base and DW_AT_dwo_id.
+
+  // 2.17.1 requires that we use DW_AT_low_pc for a single entry point
+  // into an entity.
+  NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
+  // DW_AT_stmt_list is a offset of line number information for this
+  // compile unit in debug_line section.
+  if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
+    NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
+                    Asm->GetTempSymbol("section_line"));
+  else
+    NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+
+  if (!CompilationDir.empty())
+    NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
+
+  SkeletonHolder.addUnit(NewCU);
+
+  return NewCU;
+}
+
+void DwarfDebug::emitSkeletonCU(const MCSection *Section) {
+  Asm->OutStreamer.SwitchSection(Section);
+  DIE *Die = SkeletonCU->getCUDie();
+
+  // Emit the compile units header.
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("skel_info_begin",
+                                                SkeletonCU->getUniqueID()));
+
+  // Emit size of content not including length itself
+  unsigned ContentSize = Die->getSize() +
+    sizeof(int16_t) + // DWARF version number
+    sizeof(int32_t) + // Offset Into Abbrev. Section
+    sizeof(int8_t);   // Pointer Size (in bytes)
+
+  Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
+  Asm->EmitInt32(ContentSize);
+  Asm->OutStreamer.AddComment("DWARF version number");
+  Asm->EmitInt16(dwarf::DWARF_VERSION);
+  Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
+  Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"),
+                         DwarfAbbrevSectionSym);
+  Asm->OutStreamer.AddComment("Address Size (in bytes)");
+  Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+
+  emitDIE(Die);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("skel_info_end",
+                                                SkeletonCU->getUniqueID()));
+
+
+}
+
+// Emit the .debug_info.dwo section for fission. This contains the compile
+// units that would normally be in debug_info.
+void DwarfDebug::emitDebugInfoDWO() {
+  assert(useSplitDwarf() && "No split dwarf debug info?");
+  emitCompileUnits(Asm->getObjFileLowering().getDwarfInfoDWOSection());
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 3394483010..9ddefee8e7 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -15,15 +15,15 @@
 #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
 
 #include "DIE.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/LexicalScopes.h"
-#include "llvm/MC/MachineLocation.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/LexicalScopes.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/DebugLoc.h"
 
@@ -43,8 +43,7 @@ class DIEBlock;
 class DIEEntry;
 
 //===----------------------------------------------------------------------===//
-/// SrcLineInfo - This class is used to record source line correspondence.
-///
+/// \brief This class is used to record source line correspondence.
 class SrcLineInfo {
   unsigned Line;                     // Source line number.
   unsigned Column;                   // Source column.
@@ -61,8 +60,8 @@ public:
   MCSymbol *getLabel() const { return Label; }
 };
 
-/// DotDebugLocEntry - This struct describes location entries emitted in
-/// .debug_loc section.
+/// \brief This struct describes location entries emitted in the .debug_loc
+/// section.
 typedef struct DotDebugLocEntry {
   const MCSymbol *Begin;
   const MCSymbol *End;
@@ -101,7 +100,7 @@ typedef struct DotDebugLocEntry {
     : Begin(B), End(E), Variable(0), Merged(false),
       Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; }
 
-  /// Empty entries are also used as a trigger to emit temp label. Such
+  /// \brief Empty entries are also used as a trigger to emit temp label. Such
   /// labels are referenced is used to find debug_loc offset for a given DIE.
   bool isEmpty() { return Begin == 0 && End == 0; }
   bool isMerged() { return Merged; }
@@ -121,8 +120,7 @@ typedef struct DotDebugLocEntry {
 } DotDebugLocEntry;
 
 //===----------------------------------------------------------------------===//
-/// DbgVariable - This class is used to track local variable information.
-///
+/// \brief This class is used to track local variable information.
 class DbgVariable {
   DIVariable Var;                    // Variable Descriptor.
   DIE *TheDIE;                       // Variable DIE.
@@ -155,7 +153,7 @@ public:
 
     return dwarf::DW_TAG_variable;
   }
-  /// isArtificial - Return true if DbgVariable is artificial.
+  /// \brief Return true if DbgVariable is artificial.
   bool isArtificial()                const {
     if (Var.isArtificial())
       return true;
@@ -190,14 +188,48 @@ public:
   DIType getType() const;
 };
 
+/// \brief Collects and handles information specific to a particular
+/// collection of units.
+class DwarfUnits {
+  // Target of Dwarf emission, used for sizing of abbreviations.
+  AsmPrinter *Asm;
+
+  // Used to uniquely define abbreviations.
+  FoldingSet<DIEAbbrev> *AbbreviationsSet;
+
+  // A list of all the unique abbreviations in use.
+  std::vector<DIEAbbrev *> *Abbreviations;
+
+  // A pointer to all units in the section.
+  SmallVector<CompileUnit *, 1> CUs;
+
+public:
+  DwarfUnits(AsmPrinter *AP, FoldingSet<DIEAbbrev> *AS,
+             std::vector<DIEAbbrev *> *A) :
+    Asm(AP), AbbreviationsSet(AS), Abbreviations(A) {}
+
+  /// \brief Compute the size and offset of a DIE given an incoming Offset.
+  unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
+
+  /// \brief Compute the size and offset of all the DIEs.
+  void computeSizeAndOffsets();
+
+  /// \brief Define a unique number for the abbreviation.
+  void assignAbbrevNumber(DIEAbbrev &Abbrev);
+
+  /// \brief Add a unit to the list of CUs.
+  void addUnit(CompileUnit *CU) { CUs.push_back(CU); }
+};
+
+/// \brief Collects and handles dwarf debug information.
 class DwarfDebug {
-  /// Asm - Target of Dwarf emission.
+  // Target of Dwarf emission.
   AsmPrinter *Asm;
 
-  /// MMI - Collected machine module information.
+  // Collected machine module information.
   MachineModuleInfo *MMI;
 
-  /// DIEValueAllocator - All DIEValues are allocated through this allocator.
+  // All DIEValues are allocated through this allocator.
   BumpPtrAllocator DIEValueAllocator;
 
   //===--------------------------------------------------------------------===//
@@ -206,92 +238,86 @@ class DwarfDebug {
 
   CompileUnit *FirstCU;
 
-  /// Maps MDNode with its corresponding CompileUnit.
+  // Maps MDNode with its corresponding CompileUnit.
   DenseMap <const MDNode *, CompileUnit *> CUMap;
 
-  /// Maps subprogram MDNode with its corresponding CompileUnit.
+  // Maps subprogram MDNode with its corresponding CompileUnit.
   DenseMap <const MDNode *, CompileUnit *> SPMap;
 
-  /// AbbreviationsSet - Used to uniquely define abbreviations.
-  ///
+  // Used to uniquely define abbreviations.
   FoldingSet<DIEAbbrev> AbbreviationsSet;
 
-  /// Abbreviations - A list of all the unique abbreviations in use.
-  ///
+  // A list of all the unique abbreviations in use.
   std::vector<DIEAbbrev *> Abbreviations;
 
-  /// SourceIdMap - Source id map, i.e. pair of source filename and directory,
-  /// separated by a zero byte, mapped to a unique id.
+  // Source id map, i.e. pair of source filename and directory,
+  // separated by a zero byte, mapped to a unique id.
   StringMap<unsigned, BumpPtrAllocator&> SourceIdMap;
 
-  /// StringPool - A String->Symbol mapping of strings used by indirect
-  /// references.
+  // A String->Symbol mapping of strings used by indirect
+  // references.
   StringMap<std::pair<MCSymbol*, unsigned>, BumpPtrAllocator&> StringPool;
   unsigned NextStringPoolNumber;
 
-  /// SectionMap - Provides a unique id per text section.
-  ///
+  // Provides a unique id per text section.
   SetVector<const MCSection*> SectionMap;
 
-  /// CurrentFnArguments - List of Arguments (DbgValues) for current function.
+  // List of Arguments (DbgValues) for current function.
   SmallVector<DbgVariable *, 8> CurrentFnArguments;
 
   LexicalScopes LScopes;
 
-  /// AbstractSPDies - Collection of abstract subprogram DIEs.
+  // Collection of abstract subprogram DIEs.
   DenseMap<const MDNode *, DIE *> AbstractSPDies;
 
-  /// ScopeVariables - Collection of dbg variables of a scope.
+  // Collection of dbg variables of a scope.
   DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8> > ScopeVariables;
 
-  /// AbstractVariables - Collection of abstract variables.
+  // Collection of abstract variables.
   DenseMap<const MDNode *, DbgVariable *> AbstractVariables;
 
-  /// DotDebugLocEntries - Collection of DotDebugLocEntry.
+  // Collection of DotDebugLocEntry.
   SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
 
-  /// InlinedSubprogramDIEs - Collection of subprogram DIEs that are marked
-  /// (at the end of the module) as DW_AT_inline.
+  // Collection of subprogram DIEs that are marked (at the end of the module)
+  // as DW_AT_inline.
   SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
 
-  /// InlineInfo - Keep track of inlined functions and their location.  This
-  /// information is used to populate the debug_inlined section.
+  // Keep track of inlined functions and their location.  This
+  // information is used to populate the debug_inlined section.
   typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
   DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
   SmallVector<const MDNode *, 4> InlinedSPNodes;
 
-  // ProcessedSPNodes - This is a collection of subprogram MDNodes that
-  // are processed to create DIEs.
+  // This is a collection of subprogram MDNodes that are processed to create DIEs.
   SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
 
-  /// LabelsBeforeInsn - Maps instruction with label emitted before
-  /// instruction.
+  // Maps instruction with label emitted before instruction.
   DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
 
-  /// LabelsAfterInsn - Maps instruction with label emitted after
-  /// instruction.
+  // Maps instruction with label emitted after instruction.
   DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
 
-  /// UserVariables - Every user variable mentioned by a DBG_VALUE instruction
-  /// in order of appearance.
+  // Every user variable mentioned by a DBG_VALUE instruction in order of
+  // appearance.
   SmallVector<const MDNode*, 8> UserVariables;
 
-  /// DbgValues - For each user variable, keep a list of DBG_VALUE
-  /// instructions in order. The list can also contain normal instructions that
-  /// clobber the previous DBG_VALUE.
+  // For each user variable, keep a list of DBG_VALUE instructions in order.
+  // The list can also contain normal instructions that clobber the previous
+  // DBG_VALUE.
   typedef DenseMap<const MDNode*, SmallVector<const MachineInstr*, 4> >
     DbgValueHistoryMap;
   DbgValueHistoryMap DbgValues;
 
   SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
 
-  /// Previous instruction's location information. This is used to determine
-  /// label location to indicate scope boundries in dwarf debug info.
+  // Previous instruction's location information. This is used to determine
+  // label location to indicate scope boundries in dwarf debug info.
   DebugLoc PrevInstLoc;
   MCSymbol *PrevLabel;
 
-  /// PrologEndLoc - This location indicates end of function prologue and
-  /// beginning of function body.
+  // This location indicates end of function prologue and beginning of function
+  // body.
   DebugLoc PrologEndLoc;
 
   struct FunctionDebugFrameInfo {
@@ -316,190 +342,185 @@ class DwarfDebug {
   // table for the same directory as DW_at_comp_dir.
   StringRef CompilationDir;
 
+  // Counter for assigning globally unique IDs for CUs.
+  unsigned GlobalCUIndexCount;
+
+  // Holder for the file specific debug information.
+  DwarfUnits InfoHolder;
+
   // Holders for the various debug information flags that we might need to
   // have exposed. See accessor functions below for description.
+
+  // Whether or not we're emitting info for older versions of gdb on darwin.
   bool IsDarwinGDBCompat;
+
+  // DWARF5 Experimental Options
   bool HasDwarfAccelTables;
-  bool HasDwarfFission;
-private:
+  bool HasSplitDwarf;
 
-  /// assignAbbrevNumber - Define a unique number for the abbreviation.
-  ///
-  void assignAbbrevNumber(DIEAbbrev &Abbrev);
+  // Fission Variables
+  // In general these will all be for bits that are left in the
+  // original object file, rather than things that are meant
+  // to be in the .dwo sections.
+
+  // The CU left in the original object file for Fission debug info.
+  CompileUnit *SkeletonCU;
+  DwarfUnits SkeletonHolder;
+
+private:
 
   void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
 
-  /// findAbstractVariable - Find abstract variable associated with Var.
+  /// \brief Find abstract variable associated with Var.
   DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
 
-  /// updateSubprogramScopeDIE - Find DIE for the given subprogram and
-  /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
-  /// If there are global variables in this scope then create and insert
-  /// DIEs for these variables.
+  /// \brief Find DIE for the given subprogram and attach appropriate
+  /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
+  /// variables in this scope then create and insert DIEs for these
+  /// variables.
   DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, const MDNode *SPNode);
 
-  /// constructLexicalScope - Construct new DW_TAG_lexical_block
-  /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
+  /// \brief Construct new DW_TAG_lexical_block for this scope and
+  /// attach DW_AT_low_pc/DW_AT_high_pc labels.
   DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
 
-  /// constructInlinedScopeDIE - This scope represents inlined body of
-  /// a function. Construct DIE to represent this concrete inlined copy
-  /// of the function.
+  /// \brief This scope represents inlined body of a function. Construct
+  /// DIE to represent this concrete inlined copy of the function.
   DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
 
-  /// constructScopeDIE - Construct a DIE for this scope.
+  /// \brief Construct a DIE for this scope.
   DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope);
 
-  /// EmitSectionLabels - Emit initial Dwarf sections with a label at
-  /// the start of each one.
+  /// \brief Emit initial Dwarf sections with a label at the start of each one.
   void emitSectionLabels();
 
-  /// emitDIE - Recursively Emits a debug information entry.
-  ///
+  /// \brief Recursively Emits a debug information entry.
   void emitDIE(DIE *Die);
 
-  /// computeSizeAndOffset - Compute the size and offset of a DIE given
-  /// an incoming Offset.
-  ///
+  /// \brief Compute the size and offset of a DIE given an incoming Offset.
   unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
 
-  /// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
-  ///
+  /// \brief Compute the size and offset of all the DIEs.
   void computeSizeAndOffsets();
 
-  /// computeInlinedDIEs - Attach DW_AT_inline attribute with inlined
-  /// subprogram DIEs.
+  /// \brief Attach DW_AT_inline attribute with inlined subprogram DIEs.
   void computeInlinedDIEs();
 
-  /// collectDeadVariables - Collect info for variables that were optimized out.
+  /// \brief Collect info for variables that were optimized out.
   void collectDeadVariables();
 
-  /// finalizeModuleInfo - Finish off debug information after all functions
-  /// have been processed.
+  /// \brief Finish off debug information after all functions have been
+  /// processed.
   void finalizeModuleInfo();
 
-  /// endSections - Emit labels to close any remaining sections that have
-  /// been left open.
+  /// \brief Emit labels to close any remaining sections that have been left
+  /// open.
   void endSections();
 
-  /// EmitDebugInfo - Emit the debug info section.
-  ///
+  /// \brief Emit all of the compile units to the target section.
+  void emitCompileUnits(const MCSection *);
+
+  /// \brief Emit the debug info section.
   void emitDebugInfo();
 
-  /// emitAbbreviations - Emit the abbreviation section.
-  ///
+  /// \brief Emit the abbreviation section.
   void emitAbbreviations();
 
-  /// emitEndOfLineMatrix - Emit the last address of the section and the end of
+  /// \brief Emit the last address of the section and the end of
   /// the line matrix.
-  ///
   void emitEndOfLineMatrix(unsigned SectionEnd);
 
-  /// emitAccelNames - Emit visible names into a hashed accelerator table
-  /// section.
+  /// \brief Emit visible names into a hashed accelerator table section.
   void emitAccelNames();
 
-  /// emitAccelObjC - Emit objective C classes and categories into a hashed
+  /// \brief Emit objective C classes and categories into a hashed
   /// accelerator table section.
   void emitAccelObjC();
 
-  /// emitAccelNamespace - Emit namespace dies into a hashed accelerator
-  /// table.
+  /// \brief Emit namespace dies into a hashed accelerator table.
   void emitAccelNamespaces();
 
-  /// emitAccelTypes() - Emit type dies into a hashed accelerator table.
-  ///
+  /// \brief Emit type dies into a hashed accelerator table.
   void emitAccelTypes();
 
-  /// emitDebugPubTypes - Emit visible types into a debug pubtypes section.
-  ///
+  /// \brief Emit visible types into a debug pubtypes section.
   void emitDebugPubTypes();
 
-  /// emitDebugStr - Emit visible names into a debug str section.
-  ///
+  /// \brief Emit visible names into a debug str section.
   void emitDebugStr();
 
-  /// emitDebugLoc - Emit visible names into a debug loc section.
-  ///
+  /// \brief Emit visible names into a debug loc section.
   void emitDebugLoc();
 
-  /// EmitDebugARanges - Emit visible names into a debug aranges section.
-  ///
+  /// \brief Emit visible names into a debug aranges section.
   void emitDebugARanges();
 
-  /// emitDebugRanges - Emit visible names into a debug ranges section.
-  ///
+  /// \brief Emit visible names into a debug ranges section.
   void emitDebugRanges();
 
-  /// emitDebugMacInfo - Emit visible names into a debug macinfo section.
-  ///
+  /// \brief Emit visible names into a debug macinfo section.
   void emitDebugMacInfo();
 
-  /// emitDebugInlineInfo - Emit inline info using following format.
-  /// Section Header:
-  /// 1. length of section
-  /// 2. Dwarf version number
-  /// 3. address size.
-  ///
-  /// Entries (one "entry" for each function that was inlined):
-  ///
-  /// 1. offset into __debug_str section for MIPS linkage name, if exists;
-  ///   otherwise offset into __debug_str for regular function name.
-  /// 2. offset into __debug_str section for regular function name.
-  /// 3. an unsigned LEB128 number indicating the number of distinct inlining
-  /// instances for the function.
-  ///
-  /// The rest of the entry consists of a {die_offset, low_pc} pair for each
-  /// inlined instance; the die_offset points to the inlined_subroutine die in
-  /// the __debug_info section, and the low_pc is the starting address for the
-  /// inlining instance.
+  /// \brief Emit inline info using custom format.
   void emitDebugInlineInfo();
 
-  /// constructCompileUnit - Create new CompileUnit for the given
-  /// metadata node with tag DW_TAG_compile_unit.
+  /// DWARF 5 Experimental Split Dwarf Emitters
+
+  /// \brief Construct the split debug info compile unit for the debug info
+  /// section.
+  CompileUnit *constructSkeletonCU(const MDNode *);
+
+  /// \brief Emit the local split debug info section.
+  void emitSkeletonCU(const MCSection *);
+
+  /// \brief Emit the debug info dwo section.
+  void emitDebugInfoDWO();
+
+  /// \brief Create new CompileUnit for the given metadata node with tag
+  /// DW_TAG_compile_unit.
   CompileUnit *constructCompileUnit(const MDNode *N);
 
-  /// construct SubprogramDIE - Construct subprogram DIE.
+  /// \brief Construct subprogram DIE.
   void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N);
 
-  /// recordSourceLine - Register a source line with debug info. Returns the
-  /// unique label that was emitted and which provides correspondence to
-  /// the source line list.
+  /// \brief Register a source line with debug info. Returns the unique
+  /// label that was emitted and which provides correspondence to the
+  /// source line list.
   void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope,
                         unsigned Flags);
 
-  /// identifyScopeMarkers() - Indentify instructions that are marking the
-  /// beginning of or ending of a scope.
+  /// \brief Indentify instructions that are marking the beginning of or
+  /// ending of a scope.
   void identifyScopeMarkers();
 
-  /// addCurrentFnArgument - If Var is an current function argument that add
-  /// it in CurrentFnArguments list.
+  /// \brief If Var is an current function argument that add it in
+  /// CurrentFnArguments list.
   bool addCurrentFnArgument(const MachineFunction *MF,
                             DbgVariable *Var, LexicalScope *Scope);
 
-  /// collectVariableInfo - Populate LexicalScope entries with variables' info.
+  /// \brief Populate LexicalScope entries with variables' info.
   void collectVariableInfo(const MachineFunction *,
                            SmallPtrSet<const MDNode *, 16> &ProcessedVars);
 
-  /// collectVariableInfoFromMMITable - Collect variable information from
-  /// side table maintained by MMI.
+  /// \brief Collect variable information from the side table maintained
+  /// by MMI.
   void collectVariableInfoFromMMITable(const MachineFunction * MF,
                                        SmallPtrSet<const MDNode *, 16> &P);
 
-  /// requestLabelBeforeInsn - Ensure that a label will be emitted before MI.
+  /// \brief Ensure that a label will be emitted before MI.
   void requestLabelBeforeInsn(const MachineInstr *MI) {
     LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0));
   }
 
-  /// getLabelBeforeInsn - Return Label preceding the instruction.
+  /// \brief Return Label preceding the instruction.
   const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
 
-  /// requestLabelAfterInsn - Ensure that a label will be emitted after MI.
+  /// \brief Ensure that a label will be emitted after MI.
   void requestLabelAfterInsn(const MachineInstr *MI) {
     LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0));
   }
 
-  /// getLabelAfterInsn - Return Label immediately following the instruction.
+  /// \brief Return Label immediately following the instruction.
   const MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
 
 public:
@@ -509,70 +530,58 @@ public:
   DwarfDebug(AsmPrinter *A, Module *M);
   ~DwarfDebug();
 
-  /// collectInfoFromNamedMDNodes - Collect debug info from named mdnodes such
-  /// as llvm.dbg.enum and llvm.dbg.ty
+  /// \brief Collect debug info from named mdnodes such as llvm.dbg.enum
+  /// and llvm.dbg.ty
   void collectInfoFromNamedMDNodes(const Module *M);
 
-  /// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder.
+  /// \brief Collect debug info using DebugInfoFinder.
   /// FIXME - Remove this when DragonEgg switches to DIBuilder.
   bool collectLegacyDebugInfo(const Module *M);
 
-  /// beginModule - Emit all Dwarf sections that should come prior to the
+  /// \brief Emit all Dwarf sections that should come prior to the
   /// content.
   void beginModule();
 
-  /// endModule - Emit all Dwarf sections that should come after the content.
-  ///
+  /// \brief Emit all Dwarf sections that should come after the content.
   void endModule();
 
-  /// beginFunction - Gather pre-function debug information.  Assumes being
-  /// emitted immediately after the function entry point.
+  /// \brief Gather pre-function debug information.
   void beginFunction(const MachineFunction *MF);
 
-  /// endFunction - Gather and emit post-function debug information.
-  ///
+  /// \brief Gather and emit post-function debug information.
   void endFunction(const MachineFunction *MF);
 
-  /// beginInstruction - Process beginning of an instruction.
+  /// \brief Process beginning of an instruction.
   void beginInstruction(const MachineInstr *MI);
 
-  /// endInstruction - Prcess end of an instruction.
+  /// \brief Process end of an instruction.
   void endInstruction(const MachineInstr *MI);
 
-  /// getOrCreateSourceID - Look up the source id with the given directory and
-  /// source file names. If none currently exists, create a new id and insert it
-  /// in the SourceIds map.
-  unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName,
-                               StringRef Extra = ""); // @LOCALMOD for Extra
-
-  // @LOCALMOD-BEGIN - Create an ID for CompileUnits, taking extra care
-  // in the case that we have multiple compile units coming from the
-  // same source file and directory.
-  unsigned getOrCreateCompileUnitID(StringRef FileName, StringRef DirName,
-                                    const MDNode *N);
-  // @LOCALMOD-END
-
+  /// \brief Look up the source id with the given directory and source file
+  /// names. If none currently exists, create a new id and insert it in the
+  /// SourceIds map.
+  unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName);
 
-  /// getStringPool - returns the entry into the start of the pool.
+  /// \brief Returns the entry into the start of the pool.
   MCSymbol *getStringPool();
 
-  /// getStringPoolEntry - returns an entry into the string pool with the given
+  /// \brief Returns an entry into the string pool with the given
   /// string text.
   MCSymbol *getStringPoolEntry(StringRef Str);
 
-  /// useDarwinGDBCompat - returns whether or not to limit some of our debug
+  /// \brief Returns whether or not to limit some of our debug
   /// output to the limitations of darwin gdb.
   bool useDarwinGDBCompat() { return IsDarwinGDBCompat; }
 
   // Experimental DWARF5 features.
 
-  /// useDwarfAccelTables - returns whether or not to emit tables that
-  /// dwarf consumers can use to accelerate lookup.
+  /// \brief Returns whether or not to emit tables that dwarf consumers can
+  /// use to accelerate lookup.
   bool useDwarfAccelTables() { return HasDwarfAccelTables; }
 
-  /// useDwarfFission - returns whether or not to change the current debug
-  /// info for the fission proposal support.
-  bool useDwarfFission() { return HasDwarfFission; }
+  /// \brief Returns whether or not to change the current debug info for the
+  /// split dwarf proposal support.
+  bool useSplitDwarf() { return HasSplitDwarf; }
 };
 } // End of namespace llvm
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 0bcb1b5cc8..6043318ba2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -12,30 +12,30 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfException.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 DwarfException::DwarfException(AsmPrinter *A)
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index f7c011968c..b902b72a16 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -12,20 +12,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GCs.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/GCMetadataPrinter.h"
-#include "llvm/Module.h"
+#include "llvm/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
 #include <cctype>
 using namespace llvm;
 
diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
index 70742a8d2e..f6338e5a4a 100644
--- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -12,30 +12,30 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfException.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Dwarf.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 Win64Exception::Win64Exception(AsmPrinter *A)
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 90f6eec831..071a45dbec 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -18,25 +18,24 @@
 
 #define DEBUG_TYPE "branchfolding"
 #include "BranchFolding.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineConstantPool.h" //  @LOCALMOD
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Function.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index fa6d4e16cf..1e08672183 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -54,7 +54,6 @@ add_llvm_library(LLVMCodeGen
   MachineInstrBundle.cpp
   MachineLICM.cpp
   MachineLoopInfo.cpp
-  MachineLoopRanges.cpp
   MachineModuleInfo.cpp
   MachineModuleInfoImpls.cpp
   MachinePassRegistry.cpp
@@ -100,9 +99,10 @@ add_llvm_library(LLVMCodeGen
   StrongPHIElimination.cpp
   TailDuplication.cpp
   TargetFrameLoweringImpl.cpp
-  TargetInstrInfoImpl.cpp
+  TargetInstrInfo.cpp
   TargetLoweringObjectFileImpl.cpp
   TargetOptionsImpl.cpp
+  TargetRegisterInfo.cpp
   TargetSchedule.cpp
   TwoAddressInstructionPass.cpp
   UnreachableBlockElim.cpp
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index b1460ed107..f890994453 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -14,13 +14,13 @@
 
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index d8e06c33a6..4486cc10b9 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -13,15 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "code-placement"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumLoopsAligned,  "Number of loops aligned");
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 377b4712be..48105d9aa8 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -17,12 +17,12 @@
 #include "CriticalAntiDepBreaker.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
@@ -378,7 +378,7 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin,
                                                  unsigned LastNewReg,
                                                  const TargetRegisterClass *RC)
 {
-  ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
+  ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC);
   for (unsigned i = 0; i != Order.size(); ++i) {
     unsigned NewReg = Order[i];
     // Don't replace a register with itself.
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index ad95c48191..8fb2b0eced 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -17,13 +17,13 @@
 #define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
 
 #include "AntiDepBreaker.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/ADT/BitVector.h"
 #include <map>
 
 namespace llvm {
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index ff2f11353a..840a10128d 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -23,12 +23,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
 DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 8964269dde..a526d24888 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -13,14 +13,14 @@
 
 #define DEBUG_TYPE "codegen-dce"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumDeletes,          "Number of dead instructions deleted");
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 709562438c..76d084877d 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -13,15 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "dwarfehprepare"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index d5d84041b6..f332925b0e 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -17,6 +17,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "early-ifcvt"
+#include "llvm/CodeGen/Passes.h"
 #include "MachineTraceMetrics.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/PostOrderIterator.h"
@@ -30,13 +31,12 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index ed78f19421..9b0e76fa20 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -21,15 +21,15 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "execution-fix"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 /// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index 2c4a93543c..b2b68828a2 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -15,12 +15,12 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "expand-isel-pseudos"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Debug.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index ffe4b63c1b..0b9e83d6cd 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -18,11 +18,11 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 1caf8c2339..923992db8e 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -14,10 +14,10 @@
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Pass.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Function.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index f4755bb163..2a37b9935a 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -16,22 +16,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 31e36f0168..0a155e48a2 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -12,24 +12,24 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "ifcvt"
-#include "BranchFolding.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "BranchFolding.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 // Hidden options for help debugging.
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 37828a70b5..c6d1a18dbd 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -14,7 +14,6 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "Spiller.h"
-#include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -22,16 +21,17 @@
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index 1541bf0c85..a8e711e33b 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -13,9 +13,9 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "InterferenceCache.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 3c928a5086..c02fb9a1ee 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_CODEGEN_INTERFERENCECACHE
 #define LLVM_CODEGEN_INTERFERENCECACHE
 
-#include "LiveIntervalUnion.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
 
 namespace llvm {
 
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index b7c9f17df9..3f986b83b1 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 template <class ArgIt>
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 91ec038725..1065614f4b 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -11,30 +11,30 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/PassManager.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/OwningPtr.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
 // Enable or disable FastISel. Both options are needed, because
@@ -96,6 +96,8 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
 
   PassConfig->addIRPasses();
 
+  PassConfig->addCodeGenPrepare();
+
   PassConfig->addPassesToHandleExceptions();
 
   PassConfig->addISelPrepare();
@@ -200,7 +202,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
                                                          *Context, *MAB, Out,
                                                          MCE, hasMCRelaxAll(),
                                                          hasMCNoExecStack()));
-    AsmStreamer.get()->InitSections();
+    AsmStreamer.get()->setAutoInitSections(true);
     break;
   }
   case CGFT_Null:
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index 6b6b9d084e..fde2de0586 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -16,10 +16,10 @@
 
 #define DEBUG_TYPE "lexicalscopes"
 #include "llvm/CodeGen/LexicalScopes.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index defc1279ec..cc0cc108d6 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -21,11 +21,6 @@
 
 #define DEBUG_TYPE "livedebug"
 #include "LiveDebugVariables.h"
-#include "VirtRegMap.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Metadata.h"
-#include "llvm/Value.h"
 #include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LexicalScopes.h"
@@ -35,11 +30,16 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Metadata.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Value.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 8585cbb30d..68f4b16023 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -19,15 +19,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "RegisterCoalescer.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "RegisterCoalescer.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 4e75d892e5..c414ded1a4 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -17,27 +17,27 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/Value.h"
+#include "LiveRangeCalc.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
-#include "LiveRangeCalc.h"
-#include "VirtRegMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Value.h"
 #include <algorithm>
-#include <limits>
 #include <cmath>
+#include <limits>
 using namespace llvm;
 
 // Switch to the new experimental algorithm for computing live intervals.
@@ -1292,7 +1292,11 @@ private:
       MachineBasicBlock::iterator MII(MI);
       ++MII;
       MachineBasicBlock* MBB = MI->getParent();
-      for (; MII != MBB->end() && LIS.getInstructionIndex(MII) < OldIdx; ++MII){
+      for (; MII != MBB->end(); ++MII){
+        if (MII->isDebugValue())
+          continue;
+        if (LIS.getInstructionIndex(MII) < OldIdx)
+          break;
         for (MachineInstr::mop_iterator MOI = MII->operands_begin(),
                                         MOE = MII->operands_end();
              MOI != MOE; ++MOI) {
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index dadd02bfc6..d5a81a311c 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -14,13 +14,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "LiveIntervalUnion.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
 #include "llvm/ADT/SparseBitVector.h"
-#include "llvm/CodeGen/MachineLoopRanges.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
 #include <algorithm>
 
 using namespace llvm;
@@ -182,33 +180,6 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
   return InterferingVRegs.size();
 }
 
-bool LiveIntervalUnion::Query::checkLoopInterference(MachineLoopRange *Loop) {
-  // VirtReg is likely live throughout the loop, so start by checking LIU-Loop
-  // overlaps.
-  IntervalMapOverlaps<LiveIntervalUnion::Map, MachineLoopRange::Map>
-    Overlaps(LiveUnion->getMap(), Loop->getMap());
-  if (!Overlaps.valid())
-    return false;
-
-  // The loop is overlapping an LIU assignment. Check VirtReg as well.
-  LiveInterval::iterator VRI = VirtReg->find(Overlaps.start());
-
-  for (;;) {
-    if (VRI == VirtReg->end())
-      return false;
-    if (VRI->start < Overlaps.stop())
-      return true;
-
-    Overlaps.advanceTo(VRI->start);
-    if (!Overlaps.valid())
-      return false;
-    if (Overlaps.start() < VRI->end)
-      return true;
-
-    VRI = VirtReg->advanceTo(VRI, Overlaps.start());
-  }
-}
-
 void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,
                                     unsigned NSize) {
   // Reuse existing allocation.
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index f8fbc7ddf0..3b28e6afb6 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index 7f22478d01..0ef069f478 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "LiveRegMatrix.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
 #include "RegisterCoalescer.h"
-#include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index f0b522bd7d..be11a8fa86 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -15,12 +15,12 @@
 
 #define DEBUG_TYPE "livestacks"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <limits>
 using namespace llvm;
 
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 6ea933d430..f81ad1cc6b 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -27,17 +27,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/STLExtras.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index fbc9e20517..ebbd47df62 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -15,6 +15,14 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "localstackalloc"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
@@ -22,19 +30,11 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 667c7613d7..e30effece3 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -12,24 +12,24 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index a079d6e591..070daf2e2b 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/InitializePasses.h"
-#include "llvm/Analysis/BlockFrequencyImpl.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/InitializePasses.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index cd3f19944e..6b97b5f7d7 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -26,6 +26,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "block-placement2"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -33,13 +38,8 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include <algorithm>
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 447921147f..df5d67226f 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Instructions.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Instructions.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index dbc41defeb..61d8d384cd 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -15,17 +15,17 @@
 
 #define DEBUG_TYPE "machine-cse"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/ScopedHashTable.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
 STATISTIC(NumCoalesces, "Number of copies coalesced");
@@ -126,8 +126,6 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
       // deleted.
       continue;
     MachineInstr *DefMI = MRI->getVRegDef(Reg);
-    if (DefMI->getParent() != MBB)
-      continue;
     if (!DefMI->isCopy())
       continue;
     unsigned SrcReg = DefMI->getOperand(1).getReg();
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 4a793281b2..dc8a2241c7 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -13,19 +13,19 @@
 
 #define DEBUG_TYPE "codegen-cp"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 STATISTIC(NumDeletes, "Number of dead copies deleted");
@@ -33,6 +33,7 @@ STATISTIC(NumDeletes, "Number of dead copies deleted");
 namespace {
   class MachineCopyPropagation : public MachineFunctionPass {
     const TargetRegisterInfo *TRI;
+    const TargetInstrInfo *TII;
     MachineRegisterInfo *MRI;
 
   public:
@@ -51,6 +52,7 @@ namespace {
                                  SourceMap &SrcMap,
                                  DenseMap<unsigned, MachineInstr*> &AvailCopyMap);
     bool CopyPropagateBlock(MachineBasicBlock &MBB);
+    void removeCopy(MachineInstr *MI);
   };
 }
 char MachineCopyPropagation::ID = 0;
@@ -124,6 +126,16 @@ static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src,
   return false;
 }
 
+// Remove MI from the function because it has been determined it is dead.
+// Turn it into a noop KILL instruction if it has super-register liveness
+// adjustments.
+void MachineCopyPropagation::removeCopy(MachineInstr *MI) {
+  if (MI->getNumOperands() == 2)
+    MI->eraseFromParent();
+  else
+    MI->setDesc(TII->get(TargetOpcode::KILL));
+}
+
 bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
   SmallSetVector<MachineInstr*, 8> MaybeDeadCopies;  // Candidates for deletion
   DenseMap<unsigned, MachineInstr*> AvailCopyMap;    // Def -> available copies map
@@ -169,7 +181,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
           for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I)
             I->clearRegisterKills(Def, TRI);
 
-          MI->eraseFromParent();
+          removeCopy(MI);
           Changed = true;
           ++NumDeletes;
           continue;
@@ -262,7 +274,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
         unsigned Reg = (*DI)->getOperand(0).getReg();
         if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg))
           continue;
-        (*DI)->eraseFromParent();
+        removeCopy(*DI);
         Changed = true;
         ++NumDeletes;
       }
@@ -298,7 +310,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
            DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end();
          DI != DE; ++DI) {
       if (!MRI->isReserved((*DI)->getOperand(0).getReg())) {
-        (*DI)->eraseFromParent();
+        removeCopy(*DI);
         Changed = true;
         ++NumDeletes;
       }
@@ -312,6 +324,7 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
 
   TRI = MF.getTarget().getRegisterInfo();
+  TII = MF.getTarget().getInstrInfo();
   MRI = &MF.getRegInfo();
 
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 91d5211857..e88c3c16fd 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -14,28 +14,28 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -58,7 +58,8 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
   else
     RegInfo = 0;
   MFInfo = 0;
-  FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
+  FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering(),
+                                               TM.Options.RealignStack);
   if (Fn->getFnAttributes().hasAttribute(Attributes::StackAlignment))
     FrameInfo->ensureMaxAlignment(Fn->getAttributes().
                                   getFnAttributes().getStackAlignment());
@@ -445,6 +446,70 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const {
 //  MachineFrameInfo implementation
 //===----------------------------------------------------------------------===//
 
+/// ensureMaxAlignment - Make sure the function is at least Align bytes
+/// aligned.
+void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
+  if (!TFI.isStackRealignable() || !RealignOption)
+    assert(Align <= TFI.getStackAlignment() &&
+           "For targets without stack realignment, Align is out of limit!");
+  if (MaxAlignment < Align) MaxAlignment = Align;
+}
+
+/// clampStackAlignment - Clamp the alignment if requested and emit a warning.
+static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
+                                           unsigned StackAlign) {
+  if (!ShouldClamp || Align <= StackAlign)
+    return Align;
+  DEBUG(dbgs() << "Warning: requested alignment " << Align
+               << " exceeds the stack alignment " << StackAlign
+               << " when stack realignment is off" << '\n');
+  return StackAlign;
+}
+
+/// CreateStackObject - Create a new statically sized stack object, returning
+/// a nonnegative identifier to represent it.
+///
+int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
+                      bool isSS, bool MayNeedSP, const AllocaInst *Alloca) {
+  assert(Size != 0 && "Cannot allocate zero size stack objects!");
+  Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+                                  Alignment, TFI.getStackAlignment());
+  Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP,
+                                Alloca));
+  int Index = (int)Objects.size() - NumFixedObjects - 1;
+  assert(Index >= 0 && "Bad frame index!");
+  ensureMaxAlignment(Alignment);
+  return Index;
+}
+
+/// CreateSpillStackObject - Create a new statically sized stack object that
+/// represents a spill slot, returning a nonnegative identifier to represent
+/// it.
+///
+int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
+                                             unsigned Alignment) {
+  Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+                                  Alignment, TFI.getStackAlignment()); 
+  CreateStackObject(Size, Alignment, true, false);
+  int Index = (int)Objects.size() - NumFixedObjects - 1;
+  ensureMaxAlignment(Alignment);
+  return Index;
+}
+
+/// CreateVariableSizedObject - Notify the MachineFrameInfo object that a
+/// variable sized object has been created.  This must be created whenever a
+/// variable sized object is created, whether or not the index returned is
+/// actually used.
+///
+int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) {
+  HasVarSizedObjects = true;
+  Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+                                  Alignment, TFI.getStackAlignment()); 
+  Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0));
+  ensureMaxAlignment(Alignment);
+  return (int)Objects.size()-NumFixedObjects-1;
+}
+
 /// CreateFixedObject - Create a new object at a fixed location on the stack.
 /// All fixed objects should be created before other objects are created for
 /// efficiency. By default, fixed objects are immutable. This returns an
@@ -459,6 +524,8 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
   // object is 16-byte aligned.
   unsigned StackAlign = TFI.getStackAlignment();
   unsigned Align = MinAlign(SPOffset, StackAlign);
+  Align = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption,
+                              Align, TFI.getStackAlignment()); 
   Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
                                               /*isSS*/   false,
                                               /*NeedSP*/ false,
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index ed94efb935..fa9c821b2a 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index ce8d52000b..3aebdcdabb 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -12,15 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Value.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -28,19 +22,25 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/Hashing.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Type.h"
+#include "llvm/Value.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -518,16 +518,6 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
 // MachineInstr Implementation
 //===----------------------------------------------------------------------===//
 
-/// MachineInstr ctor - This constructor creates a dummy MachineInstr with
-/// MCID NULL and no operands.
-MachineInstr::MachineInstr()
-  : MCID(0), Flags(0), AsmPrinterFlags(0),
-    NumMemRefs(0), MemRefs(0),
-    Parent(0) {
-  // Make sure that we get added to a machine basicblock
-  LeakDetector::addGarbageObject(this);
-}
-
 void MachineInstr::addImplicitDefUseOperands() {
   if (MCID->ImplicitDefs)
     for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
@@ -554,23 +544,6 @@ MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
   LeakDetector::addGarbageObject(this);
 }
 
-/// MachineInstr ctor - Work exactly the same as the ctor two above, except
-/// that the MachineInstr is created and added to the end of the specified
-/// basic block.
-MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
-                           const MCInstrDesc &tid)
-  : MCID(&tid), Flags(0), AsmPrinterFlags(0),
-    NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) {
-  assert(MBB && "Cannot use inserting ctor with null basic block!");
-  unsigned NumImplicitOps =
-    MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
-  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
-  addImplicitDefUseOperands();
-  // Make sure that we get added to a machine basicblock
-  LeakDetector::addGarbageObject(this);
-  MBB->push_back(this);  // Add instruction to end of basic block!
-}
-
 /// MachineInstr ctor - Copies MachineInstr arg exactly
 ///
 MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
@@ -921,6 +894,38 @@ unsigned MachineInstr::getNumExplicitOperands() const {
   return NumOperands;
 }
 
+void MachineInstr::bundleWithPred() {
+  assert(!isBundledWithPred() && "MI is already bundled with its predecessor");
+  setFlag(BundledPred);
+  MachineBasicBlock::instr_iterator Pred = this;
+  --Pred;
+  Pred->setFlag(BundledSucc);
+}
+
+void MachineInstr::bundleWithSucc() {
+  assert(!isBundledWithSucc() && "MI is already bundled with its successor");
+  setFlag(BundledSucc);
+  MachineBasicBlock::instr_iterator Succ = this;
+  ++Succ;
+  Succ->setFlag(BundledPred);
+}
+
+void MachineInstr::unbundleFromPred() {
+  assert(isBundledWithPred() && "MI isn't bundled with its predecessor");
+  clearFlag(BundledPred);
+  MachineBasicBlock::instr_iterator Pred = this;
+  --Pred;
+  Pred->clearFlag(BundledSucc);
+}
+
+void MachineInstr::unbundleFromSucc() {
+  assert(isBundledWithSucc() && "MI isn't bundled with its successor");
+  clearFlag(BundledSucc);
+  MachineBasicBlock::instr_iterator Succ = this;
+  --Succ;
+  Succ->clearFlag(BundledPred);
+}
+
 /// isBundled - Return true if this instruction part of a bundle. This is true
 /// if either itself or its following instruction is marked "InsideBundle".
 bool MachineInstr::isBundled() const {
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index 70f97dedaa..dd46ecb17d 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -8,14 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 169443e03d..8c2b4e6aa2 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -22,6 +22,10 @@
 
 #define DEBUG_TYPE "machine-licm"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -29,17 +33,13 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 static cl::opt<bool>
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index 27afeec1d9..4e2cfdc4e5 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -15,9 +15,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/Analysis/LoopInfoImpl.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Analysis/LoopInfoImpl.h"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
 
diff --git a/lib/CodeGen/MachineLoopRanges.cpp b/lib/CodeGen/MachineLoopRanges.cpp
deleted file mode 100644
index 17fe67f650..0000000000
--- a/lib/CodeGen/MachineLoopRanges.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-//===- MachineLoopRanges.cpp - Ranges of machine loops --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides the implementation of the MachineLoopRanges analysis.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/MachineLoopRanges.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/Passes.h"
-
-using namespace llvm;
-
-char MachineLoopRanges::ID = 0;
-INITIALIZE_PASS_BEGIN(MachineLoopRanges, "machine-loop-ranges",
-                "Machine Loop Ranges", true, true)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_END(MachineLoopRanges, "machine-loop-ranges",
-                "Machine Loop Ranges", true, true)
-
-char &llvm::MachineLoopRangesID = MachineLoopRanges::ID;
-
-void MachineLoopRanges::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AU.addRequiredTransitive<SlotIndexes>();
-  AU.addRequiredTransitive<MachineLoopInfo>();
-  MachineFunctionPass::getAnalysisUsage(AU);
-}
-
-/// runOnMachineFunction - Don't do much, loop ranges are computed on demand.
-bool MachineLoopRanges::runOnMachineFunction(MachineFunction &) {
-  releaseMemory();
-  Indexes = &getAnalysis<SlotIndexes>();
-  return false;
-}
-
-void MachineLoopRanges::releaseMemory() {
-  DeleteContainerSeconds(Cache);
-  Cache.clear();
-}
-
-MachineLoopRange *MachineLoopRanges::getLoopRange(const MachineLoop *Loop) {
-  MachineLoopRange *&Range = Cache[Loop];
-  if (!Range)
-    Range = new MachineLoopRange(Loop, Allocator, *Indexes);
-  return Range;
-}
-
-/// Create a MachineLoopRange, only accessible to MachineLoopRanges.
-MachineLoopRange::MachineLoopRange(const MachineLoop *loop,
-                                   MachineLoopRange::Allocator &alloc,
-                                   SlotIndexes &Indexes)
-  : Loop(loop), Intervals(alloc), Area(0) {
-  // Compute loop coverage.
-  for (MachineLoop::block_iterator I = Loop->block_begin(),
-         E = Loop->block_end(); I != E; ++I) {
-    const std::pair<SlotIndex, SlotIndex> &Range = Indexes.getMBBRange(*I);
-    Intervals.insert(Range.first, Range.second, 1u);
-    Area += Range.first.distance(Range.second);
-  }
-}
-
-/// overlaps - Return true if this loop overlaps the given range of machine
-/// instructions.
-bool MachineLoopRange::overlaps(SlotIndex Start, SlotIndex Stop) {
-  Map::const_iterator I = Intervals.find(Start);
-  return I.valid() && Stop > I.start();
-}
-
-unsigned MachineLoopRange::getNumber() const {
-  return Loop->getHeader()->getNumber();
-}
-
-/// byNumber - Comparator for array_pod_sort that sorts a list of
-/// MachineLoopRange pointers by number.
-int MachineLoopRange::byNumber(const void *pa, const void *pb) {
-  const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
-  const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
-  unsigned na = a->getNumber();
-  unsigned nb = b->getNumber();
-  if (na < nb)
-    return -1;
-  if (na > nb)
-    return 1;
-  return 0;
-}
-
-/// byAreaDesc - Comparator for array_pod_sort that sorts a list of
-/// MachineLoopRange pointers by:
-/// 1. Descending area.
-/// 2. Ascending number.
-int MachineLoopRange::byAreaDesc(const void *pa, const void *pb) {
-  const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
-  const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
-  if (a->getArea() != b->getArea())
-    return a->getArea() > b->getArea() ? -1 : 1;
-  return byNumber(pa, pb);
-}
-
-void MachineLoopRange::print(raw_ostream &OS) const {
-  OS << "Loop#" << getNumber() << " =";
-  for (Map::const_iterator I = Intervals.begin(); I.valid(); ++I)
-    OS << " [" << I.start() << ';' << I.stop() << ')';
-}
-
-raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineLoopRange &MLR) {
-  MLR.print(OS);
-  return OS;
-}
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 005bf783e3..ad88c51118 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -8,18 +8,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineModuleInfo.h"
-
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/ADT/PointerUnion.h"
+#include "llvm/Module.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
@@ -254,15 +253,8 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
 MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
                                      const MCRegisterInfo &MRI,
                                      const MCObjectFileInfo *MOFI)
-  : ImmutablePass(ID), Context(MAI, MRI, MOFI),
-    ObjFileMMI(0), CompactUnwindEncoding(0), CurCallSite(0), CallsEHReturn(0),
-    CallsUnwindInit(0), DbgInfoAvailable(false),
-    UsesVAFloatArgument(false) {
+  : ImmutablePass(ID), Context(MAI, MRI, MOFI, 0, false) {
   initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
-  // Always emit some info, by default "no personality" info.
-  Personalities.push_back(NULL);
-  AddrLabelSymbols = 0;
-  TheModule = 0;
 }
 
 MachineModuleInfo::MachineModuleInfo()
@@ -275,25 +267,35 @@ MachineModuleInfo::MachineModuleInfo()
 
 MachineModuleInfo::~MachineModuleInfo() {
   delete ObjFileMMI;
+}
 
-  // FIXME: Why isn't doFinalization being called??
-  //assert(AddrLabelSymbols == 0 && "doFinalization not called");
-  delete AddrLabelSymbols;
+bool MachineModuleInfo::doInitialization(Module &M) {
+  
+  Context.doInitialization();
+
+  ObjFileMMI = 0;
+  CompactUnwindEncoding = 0;
+  CurCallSite = 0;
+  CallsEHReturn = 0;
+  CallsUnwindInit = 0;
+  DbgInfoAvailable = UsesVAFloatArgument = false; 
+  // Always emit some info, by default "no personality" info.
+  Personalities.push_back(NULL);
   AddrLabelSymbols = 0;
-}
+  TheModule = 0;
 
-/// doInitialization - Initialize the state for a new module.
-///
-bool MachineModuleInfo::doInitialization() {
-  assert(AddrLabelSymbols == 0 && "Improperly initialized");
   return false;
 }
 
-/// doFinalization - Tear down the state after completion of a module.
-///
-bool MachineModuleInfo::doFinalization() {
+bool MachineModuleInfo::doFinalization(Module &M) {
+
+  Personalities.clear();
+
   delete AddrLabelSymbols;
   AddrLabelSymbols = 0;
+
+  Context.doFinalization();
+
   return false;
 }
 
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 34518fa46b..95d7a7dd68 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -305,8 +305,6 @@ void MachineRegisterInfo::dumpUses(unsigned Reg) const {
 #endif
 
 void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
-  assert (!reservedRegsFrozen() &&
-          "freezeReservedRegs should only be called once!");
   ReservedRegs = TRI->getReservedRegs(MF);
   assert(ReservedRegs.size() == TRI->getNumRegs() &&
          "Invalid ReservedRegs vector from target");
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 076547a5ed..1ee8297726 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -13,19 +13,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
 using namespace llvm;
 
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 8d43360e67..c7afa08fcd 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -14,20 +14,19 @@
 
 #define DEBUG_TYPE "misched"
 
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/ScheduleDAGILP.h"
+#include "llvm/CodeGen/ScheduleDFS.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/PriorityQueue.h"
-
 #include <queue>
 
 using namespace llvm;
@@ -533,7 +532,7 @@ void ScheduleDAGMI::schedule() {
   placeDebugValues();
 
   DEBUG({
-      unsigned BBNum = top()->getParent()->getNumber();
+      unsigned BBNum = begin()->getParent()->getNumber();
       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
       dumpSchedule();
       dbgs() << '\n';
@@ -603,7 +602,11 @@ void ScheduleDAGMI::initQueues() {
 
   SchedImpl->registerRoots();
 
+  // Advance past initial DebugValues.
+  assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
+  TopRPTracker.setPos(CurrentTop);
+
   CurrentBottom = RegionEnd;
 }
 
@@ -674,6 +677,8 @@ void ScheduleDAGMI::placeDebugValues() {
     std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
     MachineInstr *DbgValue = P.first;
     MachineBasicBlock::iterator OrigPrevMI = P.second;
+    if (&*RegionBegin == DbgValue)
+      ++RegionBegin;
     BB->splice(++OrigPrevMI, BB, DbgValue);
     if (OrigPrevMI == llvm::prior(RegionEnd))
       RegionEnd = DbgValue;
@@ -2054,58 +2059,101 @@ ConvergingSchedRegistry("converge", "Standard converging scheduler.",
 namespace {
 /// \brief Order nodes by the ILP metric.
 struct ILPOrder {
-  ScheduleDAGILP *ILP;
+  SchedDFSResult *DFSResult;
+  BitVector *ScheduledTrees;
   bool MaximizeILP;
 
-  ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {}
+  ILPOrder(SchedDFSResult *dfs, BitVector *schedtrees, bool MaxILP)
+    : DFSResult(dfs), ScheduledTrees(schedtrees), MaximizeILP(MaxILP) {}
 
   /// \brief Apply a less-than relation on node priority.
+  ///
+  /// (Return true if A comes after B in the Q.)
   bool operator()(const SUnit *A, const SUnit *B) const {
-    // Return true if A comes after B in the Q.
+    unsigned SchedTreeA = DFSResult->getSubtreeID(A);
+    unsigned SchedTreeB = DFSResult->getSubtreeID(B);
+    if (SchedTreeA != SchedTreeB) {
+      // Unscheduled trees have lower priority.
+      if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
+        return ScheduledTrees->test(SchedTreeB);
+
+      // Trees with shallower connections have have lower priority.
+      if (DFSResult->getSubtreeLevel(SchedTreeA)
+          != DFSResult->getSubtreeLevel(SchedTreeB)) {
+        return DFSResult->getSubtreeLevel(SchedTreeA)
+          < DFSResult->getSubtreeLevel(SchedTreeB);
+      }
+    }
     if (MaximizeILP)
-      return ILP->getILP(A) < ILP->getILP(B);
+      return DFSResult->getILP(A) < DFSResult->getILP(B);
     else
-      return ILP->getILP(A) > ILP->getILP(B);
+      return DFSResult->getILP(A) > DFSResult->getILP(B);
   }
 };
 
 /// \brief Schedule based on the ILP metric.
 class ILPScheduler : public MachineSchedStrategy {
-  ScheduleDAGILP ILP;
+  /// In case all subtrees are eventually connected to a common root through
+  /// data dependence (e.g. reduction), place an upper limit on their size.
+  ///
+  /// FIXME: A subtree limit is generally good, but in the situation commented
+  /// above, where multiple similar subtrees feed a common root, we should
+  /// only split at a point where the resulting subtrees will be balanced.
+  /// (a motivating test case must be found).
+  static const unsigned SubtreeLimit = 16;
+
+  SchedDFSResult DFSResult;
+  BitVector ScheduledTrees;
   ILPOrder Cmp;
 
   std::vector<SUnit*> ReadyQ;
 public:
   ILPScheduler(bool MaximizeILP)
-  : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {}
+  : DFSResult(/*BottomUp=*/true, SubtreeLimit),
+    Cmp(&DFSResult, &ScheduledTrees, MaximizeILP) {}
 
   virtual void initialize(ScheduleDAGMI *DAG) {
     ReadyQ.clear();
-    ILP.resize(DAG->SUnits.size());
+    DFSResult.clear();
+    DFSResult.resize(DAG->SUnits.size());
+    ScheduledTrees.clear();
   }
 
   virtual void registerRoots() {
-    for (std::vector<SUnit*>::const_iterator
-           I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) {
-      ILP.computeILP(*I);
-    }
+    DFSResult.compute(ReadyQ);
+    ScheduledTrees.resize(DFSResult.getNumSubtrees());
+    // Restore the heap in ReadyQ with the updated DFS results.
+    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
   }
 
   /// Implement MachineSchedStrategy interface.
   /// -----------------------------------------
 
+  /// Callback to select the highest priority node from the ready Q.
   virtual SUnit *pickNode(bool &IsTopNode) {
     if (ReadyQ.empty()) return NULL;
     pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
     SUnit *SU = ReadyQ.back();
     ReadyQ.pop_back();
     IsTopNode = false;
-    DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr()
-          << " ILP: " << ILP.getILP(SU) << '\n');
+    DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
+          << *SU->getInstr()
+          << " ILP: " << DFSResult.getILP(SU)
+          << " Tree: " << DFSResult.getSubtreeID(SU) << " @"
+          << DFSResult.getSubtreeLevel(DFSResult.getSubtreeID(SU))<< '\n');
     return SU;
   }
 
-  virtual void schedNode(SUnit *, bool) {}
+  /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
+  /// DFSResults, and resort the priority Q.
+  virtual void schedNode(SUnit *SU, bool IsTopNode) {
+    assert(!IsTopNode && "SchedDFSResult needs bottom-up");
+    if (!ScheduledTrees.test(DFSResult.getSubtreeID(SU))) {
+      ScheduledTrees.set(DFSResult.getSubtreeID(SU));
+      DFSResult.scheduleTree(DFSResult.getSubtreeID(SU));
+      std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+    }
+  }
 
   virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
 
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index b117f8c3a2..4dafbe5a3e 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -18,18 +18,18 @@
 
 #define DEBUG_TYPE "machine-sink"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 static cl::opt<bool>
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index 9686b04132..685ccab162 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -9,19 +9,19 @@
 
 #define DEBUG_TYPE "machine-trace-metrics"
 #include "MachineTraceMetrics.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SparseSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/SparseSet.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 69a3ae84ec..62856f9ca5 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -23,28 +23,28 @@
 // the verifier errors.
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/BasicBlock.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index 6da313e632..bf01df905b 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -14,13 +14,13 @@
 
 #define DEBUG_TYPE "phi-opt"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Function.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
 STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index e6e23da27c..827fd8f90f 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -14,23 +14,23 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "phielim"
+#include "llvm/CodeGen/Passes.h"
 #include "PHIEliminationUtils.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Function.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/CodeGen/PHIEliminationUtils.cpp b/lib/CodeGen/PHIEliminationUtils.cpp
index 10bfdcce67..e1b56e962f 100644
--- a/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/lib/CodeGen/PHIEliminationUtils.cpp
@@ -8,10 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "PHIEliminationUtils.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/ADT/SmallPtrSet.h"
 using namespace llvm;
 
 // findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 7728cb4d4e..33fad7f8f7 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -12,22 +12,22 @@
 //
 //===---------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/PassManager.h"
+#include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
 
@@ -410,12 +410,16 @@ void TargetPassConfig::addPassesToHandleExceptions() {
   }
 }
 
-/// Add common passes that perform LLVM IR to IR transforms in preparation for
-/// instruction selection.
-void TargetPassConfig::addISelPrepare() {
+/// Add pass to prepare the LLVM IR for code generation. This should be done
+/// before exception handling preparation passes.
+void TargetPassConfig::addCodeGenPrepare() {
   if (getOptLevel() != CodeGenOpt::None && !DisableCGP)
     addPass(createCodeGenPreparePass(getTargetLowering()));
+}
 
+/// Add common passes that perform LLVM IR to IR transforms in preparation for
+/// instruction selection.
+void TargetPassConfig::addISelPrepare() {
   addPass(createStackProtectorPass(getTargetLowering()));
 
   addPreISel();
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index a795ac8448..cc07d47150 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -53,16 +53,16 @@
 
 #define DEBUG_TYPE "peephole-opt"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 // Optimize Extensions
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index f37fc82b2a..8892e846fa 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -19,12 +19,14 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "post-RA-sched"
-#include "AntiDepBreaker.h"
+#include "llvm/CodeGen/Passes.h"
 #include "AggressiveAntiDepBreaker.h"
+#include "AntiDepBreaker.h"
 #include "CriticalAntiDepBreaker.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -33,18 +35,16 @@
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 STATISTIC(NumNoops, "Number of noops inserted");
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 36c1ae7f72..7dd890c4fb 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -21,25 +21,25 @@
 
 #define DEBUG_TYPE "pei"
 #include "PrologEpilogInserter.h"
-#include "llvm/InlineAsm.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <climits>
 
 using namespace llvm;
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index 0d140a9bb4..87fff9afb3 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -22,11 +22,11 @@
 #ifndef LLVM_CODEGEN_PEI_H
 #define LLVM_CODEGEN_PEI_H
 
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SparseBitVector.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 49599b3ab9..e8af5a3b1d 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -11,14 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
 #include <map>
 using namespace llvm;
 
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index 2b598e3a56..c035590357 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -14,14 +14,14 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "RegAllocBase.h"
-#include "LiveRegMatrix.h"
 #include "Spiller.h"
-#include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #ifndef NDEBUG
@@ -58,6 +58,7 @@ void RegAllocBase::init(VirtRegMap &vrm,
   VRM = &vrm;
   LIS = &lis;
   Matrix = &mat;
+  MRI->freezeReservedRegs(vrm.getMachineFunction());
   RegClassInfo.runOnMachineFunction(vrm.getMachineFunction());
 }
 
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index db0c8e13d3..064e40f06b 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -37,9 +37,9 @@
 #ifndef LLVM_CODEGEN_REGALLOCBASE
 #define LLVM_CODEGEN_REGALLOCBASE
 
-#include "LiveIntervalUnion.h"
-#include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
 
 namespace llvm {
 
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 8a49609552..3053119f4d 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -13,30 +13,29 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/Passes.h"
 #include "AllocationOrder.h"
-#include "RegAllocBase.h"
 #include "LiveDebugVariables.h"
+#include "RegAllocBase.h"
 #include "Spiller.h"
-#include "VirtRegMap.h"
-#include "LiveRegMatrix.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/PassAnalysisSupport.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
 #include <cstdlib>
 #include <queue>
 
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 3744b06f3a..4c629993c7 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -13,28 +13,28 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SparseSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -527,10 +527,10 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
     }
   }
 
-  ArrayRef<unsigned> AO = RegClassInfo.getOrder(RC);
+  ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(RC);
 
   // First try to find a completely free register.
-  for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
+  for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
     unsigned PhysReg = *I;
     if (PhysRegState[PhysReg] == regFree && !UsedInInstr.count(PhysReg)) {
       assignVirtToPhysReg(*LRI, PhysReg);
@@ -542,7 +542,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
                << RC->getName() << "\n");
 
   unsigned BestReg = 0, BestCost = spillImpossible;
-  for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
+  for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){
     unsigned Cost = calcSpillCost(*I);
     DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n");
     DEBUG(dbgs() << "\tCost: " << Cost << "\n");
@@ -1127,6 +1127,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
   TM = &Fn.getTarget();
   TRI = TM->getRegisterInfo();
   TII = TM->getInstrInfo();
+  MRI->freezeReservedRegs(Fn);
   RegClassInfo.runOnMachineFunction(Fn);
   UsedInInstr.clear();
   UsedInInstr.setUniverse(TRI->getNumRegs());
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 06f69c1e0d..1884452855 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -13,36 +13,35 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
+#include "llvm/CodeGen/Passes.h"
 #include "AllocationOrder.h"
 #include "InterferenceCache.h"
 #include "LiveDebugVariables.h"
-#include "LiveRegMatrix.h"
 #include "RegAllocBase.h"
-#include "Spiller.h"
 #include "SpillPlacement.h"
+#include "Spiller.h"
 #include "SplitKit.h"
-#include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/PassAnalysisSupport.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/EdgeBundles.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
+#include "llvm/CodeGen/LiveRegMatrix.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/PassAnalysisSupport.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Timer.h"
-
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
 #include <queue>
 
 using namespace llvm;
@@ -414,7 +413,7 @@ void RAGreedy::enqueue(LiveInterval *LI) {
     Prio = (1u << 31) + Size;
 
     // Boost ranges that have a physical register hint.
-    if (TargetRegisterInfo::isPhysicalRegister(VRM->getRegAllocPref(Reg)))
+    if (VRM->hasKnownPreference(Reg))
       Prio |= (1u << 30);
   }
 
@@ -443,7 +442,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
   while ((PhysReg = Order.next()))
     if (!Matrix->checkInterference(VirtReg, PhysReg))
       break;
-  if (!PhysReg || Order.isHint(PhysReg))
+  if (!PhysReg || Order.isHint())
     return PhysReg;
 
   // PhysReg is available, but there may be a better choice.
@@ -662,7 +661,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
     BestPhys = PhysReg;
 
     // Stop if the hint can be used.
-    if (Order.isHint(PhysReg))
+    if (Order.isHint())
       break;
   }
 
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index f58d45f131..cdd92afe8a 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -31,24 +31,24 @@
 
 #define DEBUG_TYPE "regalloc"
 
-#include "Spiller.h"
-#include "VirtRegMap.h"
+#include "llvm/CodeGen/RegAllocPBQP.h"
 #include "RegisterCoalescer.h"
-#include "llvm/Module.h"
+#include "Spiller.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/RegAllocPBQP.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PBQP/HeuristicSolver.h"
 #include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/HeuristicSolver.h"
 #include "llvm/CodeGen/PBQP/Heuristics/Briggs.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -526,7 +526,7 @@ void RegAllocPBQP::finalizeAlloc() const {
          itr != end; ++itr) {
     LiveInterval *li = &lis->getInterval(*itr);
 
-    unsigned physReg = vrm->getRegAllocPref(li->reg);
+    unsigned physReg = mri->getSimpleHint(li->reg);
 
     if (physReg == 0) {
       const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
@@ -552,6 +552,8 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
   vrm = &getAnalysis<VirtRegMap>();
   spiller.reset(createInlineSpiller(*this, MF, *vrm));
 
+  mri->freezeReservedRegs(MF);
+
   DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getName() << "\n");
 
   // Allocator main loop:
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 805d235673..078a0df915 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -18,10 +18,10 @@
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -44,7 +44,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
   }
 
   // Does this MF have different CSRs?
-  const uint16_t *CSR = TRI->getCalleeSavedRegs(MF);
+  const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF);
   if (Update || CSR != CalleeSaved) {
     // Build a CSRNum map. Every CSR alias gets an entry pointing to the last
     // overlapping CSR.
@@ -79,14 +79,14 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
   unsigned NumRegs = RC->getNumRegs();
 
   if (!RCI.Order)
-    RCI.Order.reset(new unsigned[NumRegs]);
+    RCI.Order.reset(new MCPhysReg[NumRegs]);
 
   unsigned N = 0;
-  SmallVector<unsigned, 16> CSRAlias;
+  SmallVector<MCPhysReg, 16> CSRAlias;
 
   // FIXME: Once targets reserve registers instead of removing them from the
   // allocation order, we can simply use begin/end here.
-  ArrayRef<uint16_t> RawOrder = RC->getRawAllocationOrder(*MF);
+  ArrayRef<MCPhysReg> RawOrder = RC->getRawAllocationOrder(*MF);
   for (unsigned i = 0; i != RawOrder.size(); ++i) {
     unsigned PhysReg = RawOrder[i];
     // Remove reserved registers from the allocation order.
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 05c48c6802..1e1a424514 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -16,36 +16,31 @@
 #define DEBUG_TYPE "regalloc"
 #include "RegisterCoalescer.h"
 #include "LiveDebugVariables.h"
-#include "VirtRegMap.h"
-
-#include "llvm/Pass.h"
-#include "llvm/Value.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Value.h"
 #include <algorithm>
 #include <cmath>
 using namespace llvm;
@@ -891,8 +886,17 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
   // Update LiveDebugVariables.
   LDV->renameRegister(SrcReg, DstReg, SubIdx);
 
+  SmallPtrSet<MachineInstr*, 8> Visited;
   for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg);
        MachineInstr *UseMI = I.skipInstruction();) {
+    // Each instruction can only be rewritten once because sub-register
+    // composition is not always idempotent. When SrcReg != DstReg, rewriting
+    // the UseMI operands removes them from the SrcReg use-def chain, but when
+    // SrcReg is DstReg we could encounter UseMI twice if it has multiple
+    // operands mentioning the virtual register.
+    if (SrcReg == DstReg && !Visited.insert(UseMI))
+      continue;
+
     SmallVector<unsigned,8> Ops;
     bool Reads, Writes;
     tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 543c426458..62e95aadc0 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -12,25 +12,22 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/RegisterPressure.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
-#include "llvm/CodeGen/RegisterPressure.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
-/// Increase register pressure for each set impacted by this register class.
+/// Increase pressure for each pressure set provided by TargetRegisterInfo.
 static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
                                 std::vector<unsigned> &MaxSetPressure,
-                                const TargetRegisterClass *RC,
-                                const TargetRegisterInfo *TRI) {
-  unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
-  for (const int *PSet = TRI->getRegClassPressureSets(RC);
-       *PSet != -1; ++PSet) {
+                                const int *PSet, unsigned Weight) {
+  for (; *PSet != -1; ++PSet) {
     CurrSetPressure[*PSet] += Weight;
     if (&CurrSetPressure != &MaxSetPressure
         && CurrSetPressure[*PSet] > MaxSetPressure[*PSet]) {
@@ -39,32 +36,57 @@ static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
   }
 }
 
-/// Decrease register pressure for each set impacted by this register class.
+/// Decrease pressure for each pressure set provided by TargetRegisterInfo.
 static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
-                                const TargetRegisterClass *RC,
-                                const TargetRegisterInfo *TRI) {
-  unsigned Weight = TRI->getRegClassWeight(RC).RegWeight;
-  for (const int *PSet = TRI->getRegClassPressureSets(RC);
-       *PSet != -1; ++PSet) {
+                                const int *PSet, unsigned Weight) {
+  for (; *PSet != -1; ++PSet) {
     assert(CurrSetPressure[*PSet] >= Weight && "register pressure underflow");
     CurrSetPressure[*PSet] -= Weight;
   }
 }
 
 /// Directly increase pressure only within this RegisterPressure result.
-void RegisterPressure::increase(const TargetRegisterClass *RC,
-                                const TargetRegisterInfo *TRI) {
-  increaseSetPressure(MaxSetPressure, MaxSetPressure, RC, TRI);
+void RegisterPressure::increase(unsigned Reg, const TargetRegisterInfo *TRI,
+                                const MachineRegisterInfo *MRI) {
+  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    increaseSetPressure(MaxSetPressure, MaxSetPressure,
+                        TRI->getRegClassPressureSets(RC),
+                        TRI->getRegClassWeight(RC).RegWeight);
+  }
+  else {
+    increaseSetPressure(MaxSetPressure, MaxSetPressure,
+                        TRI->getRegUnitPressureSets(Reg),
+                        TRI->getRegUnitWeight(Reg));
+  }
 }
 
 /// Directly decrease pressure only within this RegisterPressure result.
-void RegisterPressure::decrease(const TargetRegisterClass *RC,
-                                const TargetRegisterInfo *TRI) {
-  decreaseSetPressure(MaxSetPressure, RC, TRI);
+void RegisterPressure::decrease(unsigned Reg, const TargetRegisterInfo *TRI,
+                                const MachineRegisterInfo *MRI) {
+  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    decreaseSetPressure(MaxSetPressure, TRI->getRegClassPressureSets(RC),
+                        TRI->getRegClassWeight(RC).RegWeight);
+  }
+  else {
+    decreaseSetPressure(MaxSetPressure, TRI->getRegUnitPressureSets(Reg),
+                        TRI->getRegUnitWeight(Reg));
+  }
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static void dumpSetPressure(const std::vector<unsigned> &SetPressure,
+                            const TargetRegisterInfo *TRI) {
+  for (unsigned i = 0, e = SetPressure.size(); i < e; ++i) {
+    if (SetPressure[i] != 0)
+      dbgs() << TRI->getRegPressureSetName(i) << "=" << SetPressure[i] << '\n';
+  }
+}
+
 void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
+  dbgs() << "Max Pressure: ";
+  dumpSetPressure(MaxSetPressure, TRI);
   dbgs() << "Live In: ";
   for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i)
     dbgs() << PrintReg(LiveInRegs[i], TRI) << " ";
@@ -73,42 +95,47 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
   for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i)
     dbgs() << PrintReg(LiveOutRegs[i], TRI) << " ";
   dbgs() << '\n';
-  for (unsigned i = 0, e = MaxSetPressure.size(); i < e; ++i) {
-    if (MaxSetPressure[i] != 0)
-      dbgs() << TRI->getRegPressureSetName(i) << "=" << MaxSetPressure[i]
-             << '\n';
-  }
-}
-#endif
-
-/// Increase the current pressure as impacted by these physical registers and
-/// bump the high water mark if needed.
-void RegPressureTracker::increasePhysRegPressure(ArrayRef<unsigned> Regs) {
-  for (unsigned I = 0, E = Regs.size(); I != E; ++I)
-    increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
-                        TRI->getMinimalPhysRegClass(Regs[I]), TRI);
 }
 
-/// Simply decrease the current pressure as impacted by these physcial
-/// registers.
-void RegPressureTracker::decreasePhysRegPressure(ArrayRef<unsigned> Regs) {
-  for (unsigned I = 0, E = Regs.size(); I != E; ++I)
-    decreaseSetPressure(CurrSetPressure, TRI->getMinimalPhysRegClass(Regs[I]),
-                        TRI);
+void RegPressureTracker::dump() const {
+  dbgs() << "Curr Pressure: ";
+  dumpSetPressure(CurrSetPressure, TRI);
+  P.dump(TRI);
 }
+#endif
 
-/// Increase the current pressure as impacted by these virtual registers and
-/// bump the high water mark if needed.
-void RegPressureTracker::increaseVirtRegPressure(ArrayRef<unsigned> Regs) {
-  for (unsigned I = 0, E = Regs.size(); I != E; ++I)
-    increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
-                        MRI->getRegClass(Regs[I]), TRI);
+/// Increase the current pressure as impacted by these registers and bump
+/// the high water mark if needed.
+void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> Regs) {
+  for (unsigned I = 0, E = Regs.size(); I != E; ++I) {
+    if (TargetRegisterInfo::isVirtualRegister(Regs[I])) {
+      const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]);
+      increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
+                          TRI->getRegClassPressureSets(RC),
+                          TRI->getRegClassWeight(RC).RegWeight);
+    }
+    else {
+      increaseSetPressure(CurrSetPressure, P.MaxSetPressure,
+                          TRI->getRegUnitPressureSets(Regs[I]),
+                          TRI->getRegUnitWeight(Regs[I]));
+    }
+  }
 }
 
-/// Simply decrease the current pressure as impacted by these virtual registers.
-void RegPressureTracker::decreaseVirtRegPressure(ArrayRef<unsigned> Regs) {
-  for (unsigned I = 0, E = Regs.size(); I != E; ++I)
-    decreaseSetPressure(CurrSetPressure, MRI->getRegClass(Regs[I]), TRI);
+/// Simply decrease the current pressure as impacted by these registers.
+void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> Regs) {
+  for (unsigned I = 0, E = Regs.size(); I != E; ++I) {
+    if (TargetRegisterInfo::isVirtualRegister(Regs[I])) {
+      const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]);
+      decreaseSetPressure(CurrSetPressure,
+                          TRI->getRegClassPressureSets(RC),
+                          TRI->getRegClassWeight(RC).RegWeight);
+    }
+    else {
+      decreaseSetPressure(CurrSetPressure, TRI->getRegUnitPressureSets(Regs[I]),
+                          TRI->getRegUnitWeight(Regs[I]));
+    }
+  }
 }
 
 /// Clear the result so it can be used for another round of pressure tracking.
@@ -160,6 +187,12 @@ void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) {
   LiveInRegs.clear();
 }
 
+const LiveInterval *RegPressureTracker::getInterval(unsigned Reg) const {
+  if (TargetRegisterInfo::isVirtualRegister(Reg))
+    return &LIS->getInterval(Reg);
+  return LIS->getCachedRegUnit(Reg);
+}
+
 /// Setup the RegPressureTracker.
 ///
 /// TODO: Add support for pressure without LiveIntervals.
@@ -181,9 +214,6 @@ void RegPressureTracker::init(const MachineFunction *mf,
   }
 
   CurrPos = pos;
-  while (CurrPos != MBB->end() && CurrPos->isDebugValue())
-    ++CurrPos;
-
   CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0);
 
   if (RequireIntervals)
@@ -192,10 +222,10 @@ void RegPressureTracker::init(const MachineFunction *mf,
     static_cast<RegionPressure&>(P).reset();
   P.MaxSetPressure = CurrSetPressure;
 
-  LivePhysRegs.clear();
-  LivePhysRegs.setUniverse(TRI->getNumRegs());
-  LiveVirtRegs.clear();
-  LiveVirtRegs.setUniverse(MRI->getNumVirtRegs());
+  LiveRegs.PhysRegs.clear();
+  LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs());
+  LiveRegs.VirtRegs.clear();
+  LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs());
 }
 
 /// Does this pressure result have a valid top position and live ins.
@@ -214,19 +244,28 @@ bool RegPressureTracker::isBottomClosed() const {
           MachineBasicBlock::const_iterator());
 }
 
+
+SlotIndex RegPressureTracker::getCurrSlot() const {
+  MachineBasicBlock::const_iterator IdxPos = CurrPos;
+  while (IdxPos != MBB->end() && IdxPos->isDebugValue())
+    ++IdxPos;
+  if (IdxPos == MBB->end())
+    return LIS->getMBBEndIdx(MBB);
+  return LIS->getInstructionIndex(IdxPos).getRegSlot();
+}
+
 /// Set the boundary for the top of the region and summarize live ins.
 void RegPressureTracker::closeTop() {
   if (RequireIntervals)
-    static_cast<IntervalPressure&>(P).TopIdx =
-      LIS->getInstructionIndex(CurrPos).getRegSlot();
+    static_cast<IntervalPressure&>(P).TopIdx = getCurrSlot();
   else
     static_cast<RegionPressure&>(P).TopPos = CurrPos;
 
   assert(P.LiveInRegs.empty() && "inconsistent max pressure result");
-  P.LiveInRegs.reserve(LivePhysRegs.size() + LiveVirtRegs.size());
-  P.LiveInRegs.append(LivePhysRegs.begin(), LivePhysRegs.end());
+  P.LiveInRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size());
+  P.LiveInRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end());
   for (SparseSet<unsigned>::const_iterator I =
-         LiveVirtRegs.begin(), E = LiveVirtRegs.end(); I != E; ++I)
+         LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I)
     P.LiveInRegs.push_back(*I);
   std::sort(P.LiveInRegs.begin(), P.LiveInRegs.end());
   P.LiveInRegs.erase(std::unique(P.LiveInRegs.begin(), P.LiveInRegs.end()),
@@ -236,19 +275,15 @@ void RegPressureTracker::closeTop() {
 /// Set the boundary for the bottom of the region and summarize live outs.
 void RegPressureTracker::closeBottom() {
   if (RequireIntervals)
-    if (CurrPos == MBB->end())
-      static_cast<IntervalPressure&>(P).BottomIdx = LIS->getMBBEndIdx(MBB);
-    else
-      static_cast<IntervalPressure&>(P).BottomIdx =
-        LIS->getInstructionIndex(CurrPos).getRegSlot();
+    static_cast<IntervalPressure&>(P).BottomIdx = getCurrSlot();
   else
     static_cast<RegionPressure&>(P).BottomPos = CurrPos;
 
   assert(P.LiveOutRegs.empty() && "inconsistent max pressure result");
-  P.LiveOutRegs.reserve(LivePhysRegs.size() + LiveVirtRegs.size());
-  P.LiveOutRegs.append(LivePhysRegs.begin(), LivePhysRegs.end());
+  P.LiveOutRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size());
+  P.LiveOutRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end());
   for (SparseSet<unsigned>::const_iterator I =
-         LiveVirtRegs.begin(), E = LiveVirtRegs.end(); I != E; ++I)
+         LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I)
     P.LiveOutRegs.push_back(*I);
   std::sort(P.LiveOutRegs.begin(), P.LiveOutRegs.end());
   P.LiveOutRegs.erase(std::unique(P.LiveOutRegs.begin(), P.LiveOutRegs.end()),
@@ -258,7 +293,7 @@ void RegPressureTracker::closeBottom() {
 /// Finalize the region boundaries and record live ins and live outs.
 void RegPressureTracker::closeRegion() {
   if (!isTopClosed() && !isBottomClosed()) {
-    assert(LivePhysRegs.empty() && LiveVirtRegs.empty() &&
+    assert(LiveRegs.PhysRegs.empty() && LiveRegs.VirtRegs.empty() &&
            "no region boundary");
     return;
   }
@@ -269,151 +304,98 @@ void RegPressureTracker::closeRegion() {
   // If both top and bottom are closed, do nothing.
 }
 
-/// Return true if Reg aliases a register in Regs SparseSet.
-static bool hasRegAlias(unsigned Reg, SparseSet<unsigned> &Regs,
-                        const TargetRegisterInfo *TRI) {
-  assert(!TargetRegisterInfo::isVirtualRegister(Reg) && "only for physregs");
-  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-    if (Regs.count(*AI))
-      return true;
-  return false;
-}
-
-/// Return true if Reg aliases a register in unsorted Regs SmallVector.
-/// This is only valid for physical registers.
-static SmallVectorImpl<unsigned>::iterator
-findRegAlias(unsigned Reg, SmallVectorImpl<unsigned> &Regs,
-             const TargetRegisterInfo *TRI) {
-  for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
-    SmallVectorImpl<unsigned>::iterator I =
-      std::find(Regs.begin(), Regs.end(), *AI);
-    if (I != Regs.end())
-      return I;
-  }
-  return Regs.end();
-}
-
-/// Return true if Reg can be inserted into Regs SmallVector. For virtual
-/// register, do a linear search. For physical registers check for aliases.
-static SmallVectorImpl<unsigned>::iterator
-findReg(unsigned Reg, bool isVReg, SmallVectorImpl<unsigned> &Regs,
-        const TargetRegisterInfo *TRI) {
-  if(isVReg)
-    return std::find(Regs.begin(), Regs.end(), Reg);
-  return findRegAlias(Reg, Regs, TRI);
+/// \brief Convenient wrapper for checking membership in RegisterOperands.
+static bool containsReg(ArrayRef<unsigned> Regs, unsigned Reg) {
+  return std::find(Regs.begin(), Regs.end(), Reg) != Regs.end();
 }
 
 /// Collect this instruction's unique uses and defs into SmallVectors for
 /// processing defs and uses in order.
-template<bool isVReg>
-struct RegisterOperands {
+class RegisterOperands {
+  const TargetRegisterInfo *TRI;
+  const MachineRegisterInfo *MRI;
+
+public:
   SmallVector<unsigned, 8> Uses;
   SmallVector<unsigned, 8> Defs;
   SmallVector<unsigned, 8> DeadDefs;
 
+  RegisterOperands(const TargetRegisterInfo *tri,
+                   const MachineRegisterInfo *mri): TRI(tri), MRI(mri) {}
+
   /// Push this operand's register onto the correct vector.
-  void collect(const MachineOperand &MO, const TargetRegisterInfo *TRI) {
-    if (MO.readsReg()) {
-      if (findReg(MO.getReg(), isVReg, Uses, TRI) == Uses.end())
-      Uses.push_back(MO.getReg());
-    }
+  void collect(const MachineOperand &MO) {
+    if (!MO.isReg() || !MO.getReg())
+      return;
+    if (MO.readsReg())
+      pushRegUnits(MO.getReg(), Uses);
     if (MO.isDef()) {
-      if (MO.isDead()) {
-        if (findReg(MO.getReg(), isVReg, DeadDefs, TRI) == DeadDefs.end())
-          DeadDefs.push_back(MO.getReg());
+      if (MO.isDead())
+        pushRegUnits(MO.getReg(), DeadDefs);
+      else
+        pushRegUnits(MO.getReg(), Defs);
+    }
+  }
+
+protected:
+  void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &Regs) {
+    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+      if (containsReg(Regs, Reg))
+        return;
+      Regs.push_back(Reg);
+    }
+    else if (MRI->isAllocatable(Reg)) {
+      for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+        if (containsReg(Regs, *Units))
+          continue;
+        Regs.push_back(*Units);
       }
-      else if (findReg(MO.getReg(), isVReg, Defs, TRI) == Defs.end())
-        Defs.push_back(MO.getReg());
     }
   }
 };
-typedef RegisterOperands<false> PhysRegOperands;
-typedef RegisterOperands<true> VirtRegOperands;
 
 /// Collect physical and virtual register operands.
 static void collectOperands(const MachineInstr *MI,
-                            PhysRegOperands &PhysRegOpers,
-                            VirtRegOperands &VirtRegOpers,
-                            const TargetRegisterInfo *TRI,
-                            const MachineRegisterInfo *MRI) {
-  for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) {
-    const MachineOperand &MO = *OperI;
-    if (!MO.isReg() || !MO.getReg())
-      continue;
+                            RegisterOperands &RegOpers) {
+  for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI)
+    RegOpers.collect(*OperI);
 
-    if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
-      VirtRegOpers.collect(MO, TRI);
-    else if (MRI->isAllocatable(MO.getReg()))
-      PhysRegOpers.collect(MO, TRI);
-  }
   // Remove redundant physreg dead defs.
-  for (unsigned i = PhysRegOpers.DeadDefs.size(); i > 0; --i) {
-    unsigned Reg = PhysRegOpers.DeadDefs[i-1];
-    if (findRegAlias(Reg, PhysRegOpers.Defs, TRI) != PhysRegOpers.Defs.end())
-      PhysRegOpers.DeadDefs.erase(&PhysRegOpers.DeadDefs[i-1]);
+  for (unsigned i = RegOpers.DeadDefs.size(); i > 0; --i) {
+    unsigned Reg = RegOpers.DeadDefs[i-1];
+    if (containsReg(RegOpers.Defs, Reg))
+      RegOpers.DeadDefs.erase(&RegOpers.DeadDefs[i-1]);
   }
 }
 
 /// Force liveness of registers.
 void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) {
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    if (TargetRegisterInfo::isVirtualRegister(Regs[i])) {
-      if (LiveVirtRegs.insert(Regs[i]).second)
-        increaseVirtRegPressure(Regs[i]);
-    }
-    else  {
-      if (!hasRegAlias(Regs[i], LivePhysRegs, TRI)) {
-        LivePhysRegs.insert(Regs[i]);
-        increasePhysRegPressure(Regs[i]);
-      }
-    }
+    if (LiveRegs.insert(Regs[i]))
+      increaseRegPressure(Regs[i]);
   }
 }
 
-/// Add PhysReg to the live in set and increase max pressure.
-void RegPressureTracker::discoverPhysLiveIn(unsigned Reg) {
-  assert(!LivePhysRegs.count(Reg) && "avoid bumping max pressure twice");
-  if (findRegAlias(Reg, P.LiveInRegs, TRI) != P.LiveInRegs.end())
-    return;
-
-  // At live in discovery, unconditionally increase the high water mark.
-  P.LiveInRegs.push_back(Reg);
-  P.increase(TRI->getMinimalPhysRegClass(Reg), TRI);
-}
-
-/// Add PhysReg to the live out set and increase max pressure.
-void RegPressureTracker::discoverPhysLiveOut(unsigned Reg) {
-  assert(!LivePhysRegs.count(Reg) && "avoid bumping max pressure twice");
-  if (findRegAlias(Reg, P.LiveOutRegs, TRI) != P.LiveOutRegs.end())
-    return;
-
-  // At live out discovery, unconditionally increase the high water mark.
-  P.LiveOutRegs.push_back(Reg);
-  P.increase(TRI->getMinimalPhysRegClass(Reg), TRI);
-}
-
-/// Add VirtReg to the live in set and increase max pressure.
-void RegPressureTracker::discoverVirtLiveIn(unsigned Reg) {
-  assert(!LiveVirtRegs.count(Reg) && "avoid bumping max pressure twice");
-  if (std::find(P.LiveInRegs.begin(), P.LiveInRegs.end(), Reg) !=
-      P.LiveInRegs.end())
+/// Add Reg to the live in set and increase max pressure.
+void RegPressureTracker::discoverLiveIn(unsigned Reg) {
+  assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice");
+  if (containsReg(P.LiveInRegs, Reg))
     return;
 
   // At live in discovery, unconditionally increase the high water mark.
   P.LiveInRegs.push_back(Reg);
-  P.increase(MRI->getRegClass(Reg), TRI);
+  P.increase(Reg, TRI, MRI);
 }
 
-/// Add VirtReg to the live out set and increase max pressure.
-void RegPressureTracker::discoverVirtLiveOut(unsigned Reg) {
-  assert(!LiveVirtRegs.count(Reg) && "avoid bumping max pressure twice");
-  if (std::find(P.LiveOutRegs.begin(), P.LiveOutRegs.end(), Reg) !=
-      P.LiveOutRegs.end())
+/// Add Reg to the live out set and increase max pressure.
+void RegPressureTracker::discoverLiveOut(unsigned Reg) {
+  assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice");
+  if (containsReg(P.LiveOutRegs, Reg))
     return;
 
   // At live out discovery, unconditionally increase the high water mark.
   P.LiveOutRegs.push_back(Reg);
-  P.increase(MRI->getRegClass(Reg), TRI);
+  P.increase(Reg, TRI, MRI);
 }
 
 /// Recede across the previous instruction.
@@ -447,52 +429,35 @@ bool RegPressureTracker::recede() {
   if (RequireIntervals && isTopClosed())
     static_cast<IntervalPressure&>(P).openTop(SlotIdx);
 
-  PhysRegOperands PhysRegOpers;
-  VirtRegOperands VirtRegOpers;
-  collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI);
+  RegisterOperands RegOpers(TRI, MRI);
+  collectOperands(CurrPos, RegOpers);
 
   // Boost pressure for all dead defs together.
-  increasePhysRegPressure(PhysRegOpers.DeadDefs);
-  increaseVirtRegPressure(VirtRegOpers.DeadDefs);
-  decreasePhysRegPressure(PhysRegOpers.DeadDefs);
-  decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+  increaseRegPressure(RegOpers.DeadDefs);
+  decreaseRegPressure(RegOpers.DeadDefs);
 
   // Kill liveness at live defs.
   // TODO: consider earlyclobbers?
-  for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = PhysRegOpers.Defs[i];
-    if (LivePhysRegs.erase(Reg))
-      decreasePhysRegPressure(Reg);
-    else
-      discoverPhysLiveOut(Reg);
-  }
-  for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = VirtRegOpers.Defs[i];
-    if (LiveVirtRegs.erase(Reg))
-      decreaseVirtRegPressure(Reg);
+  for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Defs[i];
+    if (LiveRegs.erase(Reg))
+      decreaseRegPressure(Reg);
     else
-      discoverVirtLiveOut(Reg);
+      discoverLiveOut(Reg);
   }
 
   // Generate liveness for uses.
-  for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = PhysRegOpers.Uses[i];
-    if (!hasRegAlias(Reg, LivePhysRegs, TRI)) {
-      increasePhysRegPressure(Reg);
-      LivePhysRegs.insert(Reg);
-    }
-  }
-  for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = VirtRegOpers.Uses[i];
-    if (!LiveVirtRegs.count(Reg)) {
+  for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Uses[i];
+    if (!LiveRegs.contains(Reg)) {
       // Adjust liveouts if LiveIntervals are available.
       if (RequireIntervals) {
-        const LiveInterval *LI = &LIS->getInterval(Reg);
-        if (!LI->killedAt(SlotIdx))
-          discoverVirtLiveOut(Reg);
+        const LiveInterval *LI = getInterval(Reg);
+        if (LI && !LI->killedAt(SlotIdx))
+          discoverLiveOut(Reg);
       }
-      increaseVirtRegPressure(Reg);
-      LiveVirtRegs.insert(Reg);
+      increaseRegPressure(Reg);
+      LiveRegs.insert(Reg);
     }
   }
   return true;
@@ -510,7 +475,7 @@ bool RegPressureTracker::advance() {
 
   SlotIndex SlotIdx;
   if (RequireIntervals)
-    SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+    SlotIdx = getCurrSlot();
 
   // Open the bottom of the region using slot indexes.
   if (isBottomClosed()) {
@@ -520,57 +485,43 @@ bool RegPressureTracker::advance() {
       static_cast<RegionPressure&>(P).openBottom(CurrPos);
   }
 
-  PhysRegOperands PhysRegOpers;
-  VirtRegOperands VirtRegOpers;
-  collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI);
-
-  // Kill liveness at last uses.
-  for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = PhysRegOpers.Uses[i];
-    if (!hasRegAlias(Reg, LivePhysRegs, TRI))
-      discoverPhysLiveIn(Reg);
-    else {
-      // Allocatable physregs are always single-use before regalloc.
-      decreasePhysRegPressure(Reg);
-      LivePhysRegs.erase(Reg);
-    }
-  }
-  for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = VirtRegOpers.Uses[i];
+  RegisterOperands RegOpers(TRI, MRI);
+  collectOperands(CurrPos, RegOpers);
+
+  for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Uses[i];
+    // Discover live-ins.
+    bool isLive = LiveRegs.contains(Reg);
+    if (!isLive)
+      discoverLiveIn(Reg);
+    // Kill liveness at last uses.
+    bool lastUse = false;
     if (RequireIntervals) {
-      const LiveInterval *LI = &LIS->getInterval(Reg);
-      if (LI->killedAt(SlotIdx)) {
-        if (LiveVirtRegs.erase(Reg))
-          decreaseVirtRegPressure(Reg);
-        else
-          discoverVirtLiveIn(Reg);
-      }
+      const LiveInterval *LI = getInterval(Reg);
+      lastUse = LI && LI->killedAt(SlotIdx);
     }
-    else if (!LiveVirtRegs.count(Reg)) {
-      discoverVirtLiveIn(Reg);
-      increaseVirtRegPressure(Reg);
+    else {
+      // Allocatable physregs are always single-use before register rewriting.
+      lastUse = !TargetRegisterInfo::isVirtualRegister(Reg);
     }
+    if (lastUse && isLive) {
+      LiveRegs.erase(Reg);
+      decreaseRegPressure(Reg);
+    }
+    else if (!lastUse && !isLive)
+      increaseRegPressure(Reg);
   }
 
   // Generate liveness for defs.
-  for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = PhysRegOpers.Defs[i];
-    if (!hasRegAlias(Reg, LivePhysRegs, TRI)) {
-      increasePhysRegPressure(Reg);
-      LivePhysRegs.insert(Reg);
-    }
-  }
-  for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = VirtRegOpers.Defs[i];
-    if (LiveVirtRegs.insert(Reg).second)
-      increaseVirtRegPressure(Reg);
+  for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Defs[i];
+    if (LiveRegs.insert(Reg))
+      increaseRegPressure(Reg);
   }
 
   // Boost pressure for all dead defs together.
-  increasePhysRegPressure(PhysRegOpers.DeadDefs);
-  increaseVirtRegPressure(VirtRegOpers.DeadDefs);
-  decreasePhysRegPressure(PhysRegOpers.DeadDefs);
-  decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+  increaseRegPressure(RegOpers.DeadDefs);
+  decreaseRegPressure(RegOpers.DeadDefs);
 
   // Find the next instruction.
   do
@@ -661,39 +612,28 @@ static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec,
 /// This is intended for speculative queries. It leaves pressure inconsistent
 /// with the current position, so must be restored by the caller.
 void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
+  assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
+
   // Account for register pressure similar to RegPressureTracker::recede().
-  PhysRegOperands PhysRegOpers;
-  VirtRegOperands VirtRegOpers;
-  collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI);
+  RegisterOperands RegOpers(TRI, MRI);
+  collectOperands(MI, RegOpers);
 
   // Boost max pressure for all dead defs together.
   // Since CurrSetPressure and MaxSetPressure
-  increasePhysRegPressure(PhysRegOpers.DeadDefs);
-  increaseVirtRegPressure(VirtRegOpers.DeadDefs);
-  decreasePhysRegPressure(PhysRegOpers.DeadDefs);
-  decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+  increaseRegPressure(RegOpers.DeadDefs);
+  decreaseRegPressure(RegOpers.DeadDefs);
 
   // Kill liveness at live defs.
-  for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = PhysRegOpers.Defs[i];
-    if (!findReg(Reg, false, PhysRegOpers.Uses, TRI))
-      decreasePhysRegPressure(PhysRegOpers.Defs);
-  }
-  for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = VirtRegOpers.Defs[i];
-    if (!findReg(Reg, true, VirtRegOpers.Uses, TRI))
-      decreaseVirtRegPressure(VirtRegOpers.Defs);
+  for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Defs[i];
+    if (!containsReg(RegOpers.Uses, Reg))
+      decreaseRegPressure(Reg);
   }
   // Generate liveness for uses.
-  for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = PhysRegOpers.Uses[i];
-    if (!hasRegAlias(Reg, LivePhysRegs, TRI))
-      increasePhysRegPressure(Reg);
-  }
-  for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = VirtRegOpers.Uses[i];
-    if (!LiveVirtRegs.count(Reg))
-      increaseVirtRegPressure(Reg);
+  for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Uses[i];
+    if (!LiveRegs.contains(Reg))
+      increaseRegPressure(Reg);
   }
 }
 
@@ -740,6 +680,8 @@ static bool findUseBetween(unsigned Reg,
          UI = MRI->use_nodbg_begin(Reg), UE = MRI->use_nodbg_end();
          UI != UE; UI.skipInstruction()) {
       const MachineInstr* MI = &*UI;
+      if (MI->isDebugValue())
+        continue;
       SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot();
       if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx)
         return true;
@@ -754,38 +696,42 @@ static bool findUseBetween(unsigned Reg,
 /// This is intended for speculative queries. It leaves pressure inconsistent
 /// with the current position, so must be restored by the caller.
 void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
+  assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
+
   // Account for register pressure similar to RegPressureTracker::recede().
-  PhysRegOperands PhysRegOpers;
-  VirtRegOperands VirtRegOpers;
-  collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI);
+  RegisterOperands RegOpers(TRI, MRI);
+  collectOperands(MI, RegOpers);
 
   // Kill liveness at last uses. Assume allocatable physregs are single-use
   // rather than checking LiveIntervals.
-  decreasePhysRegPressure(PhysRegOpers.Uses);
-  if (RequireIntervals) {
-    SlotIndex SlotIdx = LIS->getInstructionIndex(MI).getRegSlot();
-    for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) {
-      unsigned Reg = VirtRegOpers.Uses[i];
-      const LiveInterval *LI = &LIS->getInterval(Reg);
-      // FIXME: allow the caller to pass in the list of vreg uses that remain to
-      // be bottom-scheduled to avoid searching uses at each query.
-      SlotIndex CurrIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
-      if (LI->killedAt(SlotIdx)
+  SlotIndex SlotIdx;
+  if (RequireIntervals)
+    SlotIdx = LIS->getInstructionIndex(MI).getRegSlot();
+
+  for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
+    unsigned Reg = RegOpers.Uses[i];
+    if (RequireIntervals) {
+      // FIXME: allow the caller to pass in the list of vreg uses that remain
+      // to be bottom-scheduled to avoid searching uses at each query.
+      SlotIndex CurrIdx = getCurrSlot();
+      const LiveInterval *LI = getInterval(Reg);
+      if (LI && LI->killedAt(SlotIdx)
           && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) {
-        decreaseVirtRegPressure(Reg);
+        decreaseRegPressure(Reg);
       }
     }
+    else if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+      // Allocatable physregs are always single-use before register rewriting.
+      decreaseRegPressure(Reg);
+    }
   }
 
   // Generate liveness for defs.
-  increasePhysRegPressure(PhysRegOpers.Defs);
-  increaseVirtRegPressure(VirtRegOpers.Defs);
+  increaseRegPressure(RegOpers.Defs);
 
   // Boost pressure for all dead defs together.
-  increasePhysRegPressure(PhysRegOpers.DeadDefs);
-  increaseVirtRegPressure(VirtRegOpers.DeadDefs);
-  decreasePhysRegPressure(PhysRegOpers.DeadDefs);
-  decreaseVirtRegPressure(VirtRegOpers.DeadDefs);
+  increaseRegPressure(RegOpers.DeadDefs);
+  decreaseRegPressure(RegOpers.DeadDefs);
 }
 
 /// Consider the pressure increase caused by traversing this instruction
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index c85ccd05fa..88f67da9cc 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -16,17 +16,17 @@
 
 #define DEBUG_TYPE "reg-scavenging"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 /// setUsed - Set the register and its sub-registers as being used.
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 0c50db8d34..e639c55a04 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -16,12 +16,12 @@
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <climits>
 using namespace llvm;
 
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 683011d3de..ef33b12367 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -12,8 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "sched-instrs"
-#include "llvm/Operator.h"
+#define DEBUG_TYPE "misched"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -22,20 +25,17 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/RegisterPressure.h"
-#include "llvm/CodeGen/ScheduleDAGILP.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDFS.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Operator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
@@ -67,7 +67,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
       // regular getUnderlyingObjectFromInt.
       if (U->getOpcode() == Instruction::PtrToInt)
         return U->getOperand(0);
-      // If we find an add of a constant or a multiplied value, it's
+      // If we find an add of a constant, a multiplied value, or a phi, it's
       // likely that the other operand will lead us to the base
       // object. We don't have to worry about the case where the
       // object address is somehow being computed by the multiply,
@@ -75,7 +75,8 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
       // identifiable object.
       if (U->getOpcode() != Instruction::Add ||
           (!isa<ConstantInt>(U->getOperand(1)) &&
-           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
+           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
+           !isa<PHINode>(U->getOperand(1))))
         return V;
       V = U->getOperand(0);
     } else {
@@ -85,56 +86,77 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
   } while (1);
 }
 
-/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject
+/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects
 /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
-static const Value *getUnderlyingObject(const Value *V) {
-  // First just call Value::getUnderlyingObject to let it do what it does.
+static void getUnderlyingObjects(const Value *V,
+                                 SmallVectorImpl<Value *> &Objects) {
+  SmallPtrSet<const Value*, 16> Visited;
+  SmallVector<const Value *, 4> Working(1, V);
   do {
-    V = GetUnderlyingObject(V);
-    // If it found an inttoptr, use special code to continue climing.
-    if (Operator::getOpcode(V) != Instruction::IntToPtr)
-      break;
-    const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
-    // If that succeeded in finding a pointer, continue the search.
-    if (!O->getType()->isPointerTy())
-      break;
-    V = O;
-  } while (1);
-  return V;
+    V = Working.pop_back_val();
+
+    SmallVector<Value *, 4> Objs;
+    GetUnderlyingObjects(const_cast<Value *>(V), Objs);
+
+    for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end();
+         I != IE; ++I) {
+      V = *I;
+      if (!Visited.insert(V))
+        continue;
+      if (Operator::getOpcode(V) == Instruction::IntToPtr) {
+        const Value *O =
+          getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
+        if (O->getType()->isPointerTy()) {
+          Working.push_back(O);
+          continue;
+        }
+      }
+      Objects.push_back(const_cast<Value *>(V));
+    }
+  } while (!Working.empty());
 }
 
-/// getUnderlyingObjectForInstr - If this machine instr has memory reference
+/// getUnderlyingObjectsForInstr - If this machine instr has memory reference
 /// information and it can be tracked to a normal reference to a known
-/// object, return the Value for that object. Otherwise return null.
-static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
-                                                const MachineFrameInfo *MFI,
-                                                bool &MayAlias) {
-  MayAlias = true;
+/// object, return the Value for that object.
+static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
+              const MachineFrameInfo *MFI,
+              SmallVectorImpl<std::pair<const Value *, bool> > &Objects) {
   if (!MI->hasOneMemOperand() ||
       !(*MI->memoperands_begin())->getValue() ||
       (*MI->memoperands_begin())->isVolatile())
-    return 0;
+    return;
 
   const Value *V = (*MI->memoperands_begin())->getValue();
   if (!V)
-    return 0;
-
-  V = getUnderlyingObject(V);
-  if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
-    // For now, ignore PseudoSourceValues which may alias LLVM IR values
-    // because the code that uses this function has no way to cope with
-    // such aliases.
-    if (PSV->isAliased(MFI))
-      return 0;
-
-    MayAlias = PSV->mayAlias(MFI);
-    return V;
-  }
+    return;
+
+  SmallVector<Value *, 4> Objs;
+  getUnderlyingObjects(V, Objs);
+
+  for (SmallVector<Value *, 4>::iterator I = Objs.begin(), IE = Objs.end();
+       I != IE; ++I) {
+    bool MayAlias = true;
+    V = *I;
+
+    if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+      // For now, ignore PseudoSourceValues which may alias LLVM IR values
+      // because the code that uses this function has no way to cope with
+      // such aliases.
+
+      if (PSV->isAliased(MFI)) {
+        Objects.clear();
+        return;
+      }
 
-  if (isIdentifiedObject(V))
-    return V;
+      MayAlias = PSV->mayAlias(MFI);
+    } else if (!isIdentifiedObject(V)) {
+      Objects.clear();
+      return;
+    }
 
-  return 0;
+    Objects.push_back(std::make_pair(V, MayAlias));
+  }
 }
 
 void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
@@ -209,7 +231,8 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
         Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1));
       else {
         assert(!IsPostRA && "Virtual register encountered after regalloc.");
-        addVRegUseDeps(&ExitSU, i);
+        if (MO.readsReg()) // ignore undef operands
+          addVRegUseDeps(&ExitSU, i);
       }
     }
   } else {
@@ -451,23 +474,29 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
   if ((*MI->memoperands_begin())->isVolatile() ||
        MI->hasUnmodeledSideEffects())
     return true;
-
   const Value *V = (*MI->memoperands_begin())->getValue();
   if (!V)
     return true;
 
-  V = getUnderlyingObject(V);
-  if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
-    // Similarly to getUnderlyingObjectForInstr:
-    // For now, ignore PseudoSourceValues which may alias LLVM IR values
-    // because the code that uses this function has no way to cope with
-    // such aliases.
-    if (PSV->isAliased(MFI))
+  SmallVector<Value *, 4> Objs;
+  getUnderlyingObjects(V, Objs);
+  for (SmallVector<Value *, 4>::iterator I = Objs.begin(),
+       IE = Objs.end(); I != IE; ++I) {
+    V = *I;
+
+    if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+      // Similarly to getUnderlyingObjectForInstr:
+      // For now, ignore PseudoSourceValues which may alias LLVM IR values
+      // because the code that uses this function has no way to cope with
+      // such aliases.
+      if (PSV->isAliased(MFI))
+        return true;
+    }
+
+    // Does this pointer refer to a distinct and identifiable object?
+    if (!isIdentifiedObject(V))
       return true;
   }
-  // Does this pointer refer to a distinct and identifiable object?
-  if (!isIdentifiedObject(V))
-    return true;
 
   return false;
 }
@@ -711,17 +740,17 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
   addSchedBarrierDeps();
 
   // Walk the list of instructions, from bottom moving up.
-  MachineInstr *PrevMI = NULL;
+  MachineInstr *DbgMI = NULL;
   for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
        MII != MIE; --MII) {
     MachineInstr *MI = prior(MII);
-    if (MI && PrevMI) {
-      DbgValues.push_back(std::make_pair(PrevMI, MI));
-      PrevMI = NULL;
+    if (MI && DbgMI) {
+      DbgValues.push_back(std::make_pair(DbgMI, MI));
+      DbgMI = NULL;
     }
 
     if (MI->isDebugValue()) {
-      PrevMI = MI;
+      DbgMI = MI;
       continue;
     }
     if (RPTracker) {
@@ -819,59 +848,70 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       AliasMemDefs.clear();
       AliasMemUses.clear();
     } else if (MI->mayStore()) {
-      bool MayAlias = true;
-      if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
+      SmallVector<std::pair<const Value *, bool>, 4> Objs;
+      getUnderlyingObjectsForInstr(MI, MFI, Objs);
+
+      if (Objs.empty()) {
+        // Treat all other stores conservatively.
+        goto new_alias_chain;
+      }
+
+      bool MayAlias = false;
+      for (SmallVector<std::pair<const Value *, bool>, 4>::iterator
+           K = Objs.begin(), KE = Objs.end(); K != KE; ++K) {
+        const Value *V = K->first;
+        bool ThisMayAlias = K->second;
+        if (ThisMayAlias)
+          MayAlias = true;
+
         // A store to a specific PseudoSourceValue. Add precise dependencies.
         // Record the def in MemDefs, first adding a dep if there is
         // an existing def.
         MapVector<const Value *, SUnit *>::iterator I =
-          ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+          ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
         MapVector<const Value *, SUnit *>::iterator IE =
-          ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+          ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
         if (I != IE) {
           addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
           I->second = SU;
         } else {
-          if (MayAlias)
+          if (ThisMayAlias)
             AliasMemDefs[V] = SU;
           else
             NonAliasMemDefs[V] = SU;
         }
         // Handle the uses in MemUses, if there are any.
         MapVector<const Value *, std::vector<SUnit *> >::iterator J =
-          ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
+          ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
         MapVector<const Value *, std::vector<SUnit *> >::iterator JE =
-          ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
+          ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
         if (J != JE) {
           for (unsigned i = 0, e = J->second.size(); i != e; ++i)
             addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes,
                                TrueMemOrderLatency, true);
           J->second.clear();
         }
-        if (MayAlias) {
-          // Add dependencies from all the PendingLoads, i.e. loads
-          // with no underlying object.
-          for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
-            addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
-                               TrueMemOrderLatency);
-          // Add dependence on alias chain, if needed.
-          if (AliasChain)
-            addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
-          // But we also should check dependent instructions for the
-          // SU in question.
-          adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
-                          TrueMemOrderLatency);
-        }
-        // Add dependence on barrier chain, if needed.
-        // There is no point to check aliasing on barrier event. Even if
-        // SU and barrier _could_ be reordered, they should not. In addition,
-        // we have lost all RejectMemNodes below barrier.
-        if (BarrierChain)
-          BarrierChain->addPred(SDep(SU, SDep::Barrier));
-      } else {
-        // Treat all other stores conservatively.
-        goto new_alias_chain;
       }
+      if (MayAlias) {
+        // Add dependencies from all the PendingLoads, i.e. loads
+        // with no underlying object.
+        for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+          addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes,
+                             TrueMemOrderLatency);
+        // Add dependence on alias chain, if needed.
+        if (AliasChain)
+          addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes);
+        // But we also should check dependent instructions for the
+        // SU in question.
+        adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes,
+                        TrueMemOrderLatency);
+      }
+      // Add dependence on barrier chain, if needed.
+      // There is no point to check aliasing on barrier event. Even if
+      // SU and barrier _could_ be reordered, they should not. In addition,
+      // we have lost all RejectMemNodes below barrier.
+      if (BarrierChain)
+        BarrierChain->addPred(SDep(SU, SDep::Barrier));
 
       if (!ExitSU.isPred(SU))
         // Push store's up a bit to avoid them getting in between cmp
@@ -882,20 +922,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       if (MI->isInvariantLoad(AA)) {
         // Invariant load, no chain dependencies needed!
       } else {
-        if (const Value *V =
-            getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
-          // A load from a specific PseudoSourceValue. Add precise dependencies.
-          MapVector<const Value *, SUnit *>::iterator I =
-            ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
-          MapVector<const Value *, SUnit *>::iterator IE =
-            ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
-          if (I != IE)
-            addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
-          if (MayAlias)
-            AliasMemUses[V].push_back(SU);
-          else
-            NonAliasMemUses[V].push_back(SU);
-        } else {
+        SmallVector<std::pair<const Value *, bool>, 4> Objs;
+        getUnderlyingObjectsForInstr(MI, MFI, Objs);
+
+        if (Objs.empty()) {
           // A load with no underlying object. Depend on all
           // potentially aliasing stores.
           for (MapVector<const Value *, SUnit *>::iterator I =
@@ -904,6 +934,29 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
 
           PendingLoads.push_back(SU);
           MayAlias = true;
+        } else {
+          MayAlias = false;
+        }
+
+        for (SmallVector<std::pair<const Value *, bool>, 4>::iterator
+             J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
+          const Value *V = J->first;
+          bool ThisMayAlias = J->second;
+
+          if (ThisMayAlias)
+            MayAlias = true;
+
+          // A load from a specific PseudoSourceValue. Add precise dependencies.
+          MapVector<const Value *, SUnit *>::iterator I =
+            ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+          MapVector<const Value *, SUnit *>::iterator IE =
+            ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+          if (I != IE)
+            addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true);
+          if (ThisMayAlias)
+            AliasMemUses[V].push_back(SU);
+          else
+            NonAliasMemUses[V].push_back(SU);
         }
         if (MayAlias)
           adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0);
@@ -915,8 +968,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       }
     }
   }
-  if (PrevMI)
-    FirstDbgValue = PrevMI;
+  if (DbgMI)
+    FirstDbgValue = DbgMI;
 
   Defs.clear();
   Uses.clear();
@@ -948,6 +1001,120 @@ std::string ScheduleDAGInstrs::getDAGName() const {
   return "dag." + BB->getFullName();
 }
 
+//===----------------------------------------------------------------------===//
+// SchedDFSResult Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+/// \brief Internal state used to compute SchedDFSResult.
+class SchedDFSImpl {
+  SchedDFSResult &R;
+
+  /// Join DAG nodes into equivalence classes by their subtree.
+  IntEqClasses SubtreeClasses;
+  /// List PredSU, SuccSU pairs that represent data edges between subtrees.
+  std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs;
+
+public:
+  SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSData.size()) {}
+
+  /// SubtreID is initialized to zero, set to itself to flag the root of a
+  /// subtree, set to the parent to indicate an interior node,
+  /// then set to a representative subtree ID during finalization.
+  bool isVisited(const SUnit *SU) const {
+    return R.DFSData[SU->NodeNum].SubtreeID;
+  }
+
+  /// Initialize this node's instruction count. We don't need to flag the node
+  /// visited until visitPostorder because the DAG cannot have cycles.
+  void visitPreorder(const SUnit *SU) {
+    R.DFSData[SU->NodeNum].InstrCount = SU->getInstr()->isTransient() ? 0 : 1;
+  }
+
+  /// Mark this node as either the root of a subtree or an interior
+  /// node. Increment the parent node's instruction count.
+  void visitPostorder(const SUnit *SU, const SDep *PredDep, const SUnit *Parent) {
+    R.DFSData[SU->NodeNum].SubtreeID = SU->NodeNum;
+
+    // Join the child to its parent if they are connected via data dependence
+    // and do not exceed the limit.
+    if (!Parent || PredDep->getKind() != SDep::Data)
+      return;
+
+    unsigned PredCnt = R.DFSData[SU->NodeNum].InstrCount;
+    if (PredCnt > R.SubtreeLimit)
+      return;
+
+    R.DFSData[SU->NodeNum].SubtreeID = Parent->NodeNum;
+
+    // Add the recently finished predecessor's bottom-up descendent count.
+    R.DFSData[Parent->NodeNum].InstrCount += PredCnt;
+    SubtreeClasses.join(Parent->NodeNum, SU->NodeNum);
+  }
+
+  /// Determine whether the DFS cross edge should be considered a subtree edge
+  /// or a connection between subtrees.
+  void visitCross(const SDep &PredDep, const SUnit *Succ) {
+    if (PredDep.getKind() == SDep::Data) {
+      // If this is a cross edge to a root, join the subtrees. This happens when
+      // the root was first reached by a non-data dependence.
+      unsigned NodeNum = PredDep.getSUnit()->NodeNum;
+      unsigned PredCnt = R.DFSData[NodeNum].InstrCount;
+      if (R.DFSData[NodeNum].SubtreeID == NodeNum && PredCnt < R.SubtreeLimit) {
+        R.DFSData[NodeNum].SubtreeID = Succ->NodeNum;
+        R.DFSData[Succ->NodeNum].InstrCount += PredCnt;
+        SubtreeClasses.join(Succ->NodeNum, NodeNum);
+        return;
+      }
+    }
+    ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ));
+  }
+
+  /// Set each node's subtree ID to the representative ID and record connections
+  /// between trees.
+  void finalize() {
+    SubtreeClasses.compress();
+    R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
+    R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
+    DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
+    for (unsigned Idx = 0, End = R.DFSData.size(); Idx != End; ++Idx) {
+      R.DFSData[Idx].SubtreeID = SubtreeClasses[Idx];
+      DEBUG(dbgs() << "  SU(" << Idx << ") in tree "
+            << R.DFSData[Idx].SubtreeID << '\n');
+    }
+    for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator
+           I = ConnectionPairs.begin(), E = ConnectionPairs.end();
+         I != E; ++I) {
+      unsigned PredTree = SubtreeClasses[I->first->NodeNum];
+      unsigned SuccTree = SubtreeClasses[I->second->NodeNum];
+      if (PredTree == SuccTree)
+        continue;
+      unsigned Depth = I->first->getDepth();
+      addConnection(PredTree, SuccTree, Depth);
+      addConnection(SuccTree, PredTree, Depth);
+    }
+  }
+
+protected:
+  /// Called by finalize() to record a connection between trees.
+  void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
+    if (!Depth)
+      return;
+
+    SmallVectorImpl<SchedDFSResult::Connection> &Connections =
+      R.SubtreeConnections[FromTree];
+    for (SmallVectorImpl<SchedDFSResult::Connection>::iterator
+           I = Connections.begin(), E = Connections.end(); I != E; ++I) {
+      if (I->TreeID == ToTree) {
+        I->Level = std::max(I->Level, Depth);
+        return;
+      }
+    }
+    Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
+  }
+};
+} // namespace llvm
+
 namespace {
 /// \brief Manage the stack used by a reverse depth-first search over the DAG.
 class SchedDAGReverseDFS {
@@ -960,7 +1127,10 @@ public:
   }
   void advance() { ++DFSStack.back().second; }
 
-  void backtrack() { DFSStack.pop_back(); }
+  const SDep *backtrack() {
+    DFSStack.pop_back();
+    return DFSStack.empty() ? 0 : llvm::prior(DFSStack.back().second);
+  }
 
   const SUnit *getCurr() const { return DFSStack.back().first; }
 
@@ -972,57 +1142,65 @@ public:
 };
 } // anonymous
 
-void ScheduleDAGILP::resize(unsigned NumSUnits) {
-  ILPValues.resize(NumSUnits);
-}
-
-ILPValue ScheduleDAGILP::getILP(const SUnit *SU) {
-  return ILPValues[SU->NodeNum];
-}
-
-// A leaf node has an ILP of 1/1.
-static ILPValue initILP(const SUnit *SU) {
-  unsigned Cnt = SU->getInstr()->isTransient() ? 0 : 1;
-  return ILPValue(Cnt, 1 + SU->getDepth());
-}
-
 /// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
 /// search from this root.
-void ScheduleDAGILP::computeILP(const SUnit *Root) {
+void SchedDFSResult::compute(ArrayRef<SUnit *> Roots) {
   if (!IsBottomUp)
     llvm_unreachable("Top-down ILP metric is unimplemnted");
 
-  SchedDAGReverseDFS DFS;
-  // Mark a node visited by validating it.
-  ILPValues[Root->NodeNum] = initILP(Root);
-  DFS.follow(Root);
-  for (;;) {
-    // Traverse the leftmost path as far as possible.
-    while (DFS.getPred() != DFS.getPredEnd()) {
-      const SUnit *PredSU = DFS.getPred()->getSUnit();
-      DFS.advance();
-      // If the pred is already valid, skip it.
-      if (ILPValues[PredSU->NodeNum].isValid())
-        continue;
-      ILPValues[PredSU->NodeNum] = initILP(PredSU);
-      DFS.follow(PredSU);
+  SchedDFSImpl Impl(*this);
+  for (ArrayRef<const SUnit*>::const_iterator
+         RootI = Roots.begin(), RootE = Roots.end(); RootI != RootE; ++RootI) {
+    SchedDAGReverseDFS DFS;
+    Impl.visitPreorder(*RootI);
+    DFS.follow(*RootI);
+    for (;;) {
+      // Traverse the leftmost path as far as possible.
+      while (DFS.getPred() != DFS.getPredEnd()) {
+        const SDep &PredDep = *DFS.getPred();
+        DFS.advance();
+        // If the pred is already valid, skip it. We may preorder visit a node
+        // with InstrCount==0 more than once, but it won't affect heuristics
+        // because we don't care about cross edges to leaf copies.
+        if (Impl.isVisited(PredDep.getSUnit())) {
+          Impl.visitCross(PredDep, DFS.getCurr());
+          continue;
+        }
+        Impl.visitPreorder(PredDep.getSUnit());
+        DFS.follow(PredDep.getSUnit());
+      }
+      // Visit the top of the stack in postorder and backtrack.
+      const SUnit *Child = DFS.getCurr();
+      const SDep *PredDep = DFS.backtrack();
+      Impl.visitPostorder(Child, PredDep, PredDep ? DFS.getCurr() : 0);
+      if (DFS.isComplete())
+        break;
     }
-    // Visit the top of the stack in postorder and backtrack.
-    unsigned PredCount = ILPValues[DFS.getCurr()->NodeNum].InstrCount;
-    DFS.backtrack();
-    if (DFS.isComplete())
-      break;
-    // Add the recently finished predecessor's bottom-up descendent count.
-    ILPValues[DFS.getCurr()->NodeNum].InstrCount += PredCount;
+  }
+  Impl.finalize();
+}
+
+/// The root of the given SubtreeID was just scheduled. For all subtrees
+/// connected to this tree, record the depth of the connection so that the
+/// nearest connected subtrees can be prioritized.
+void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
+  for (SmallVectorImpl<Connection>::const_iterator
+         I = SubtreeConnections[SubtreeID].begin(),
+         E = SubtreeConnections[SubtreeID].end(); I != E; ++I) {
+    SubtreeConnectLevels[I->TreeID] =
+      std::max(SubtreeConnectLevels[I->TreeID], I->Level);
+    DEBUG(dbgs() << "  Tree: " << I->TreeID
+          << " @" << SubtreeConnectLevels[I->TreeID] << '\n');
   }
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void ILPValue::print(raw_ostream &OS) const {
-  if (!isValid())
+  OS << InstrCount << " / " << Length << " = ";
+  if (!Length)
     OS << "BADILP";
-  OS << InstrCount << " / " << Cycles << " = "
-     << format("%g", ((double)InstrCount / Cycles));
+  else
+    OS << format("%g", ((double)InstrCount / Length));
 }
 
 void ILPValue::dump() const {
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 6e781b199a..950a62fac4 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -11,19 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Constants.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Constants.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <fstream>
 using namespace llvm;
 
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 37d7731aa1..1c28d6dcaf 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18,22 +18,22 @@
 
 #define DEBUG_TYPE "dagcombine"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -291,6 +291,10 @@ namespace {
                  unsigned SrcValueAlign2,
                  const MDNode *TBAAInfo2) const;
 
+    /// isAlias - Return true if there is any possibility that the two addresses
+    /// overlap.
+    bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1);
+
     /// FindAliasInfo - Extracts the relevant alias information from the memory
     /// node.  Returns true if the operand was a load.
     bool FindAliasInfo(SDNode *N,
@@ -1377,6 +1381,12 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (add x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N1;
   }
 
   // fold (add x, undef) -> undef
@@ -1620,6 +1630,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (sub x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
   }
 
   // fold (sub x, x) -> 0
@@ -2423,6 +2437,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (and x, 0) -> 0, vector edition
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N0;
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N1;
+
+    // fold (and x, -1) -> x, vector edition
+    if (ISD::isBuildVectorAllOnes(N0.getNode()))
+      return N1;
+    if (ISD::isBuildVectorAllOnes(N1.getNode()))
+      return N0;
   }
 
   // fold (and x, undef) -> 0
@@ -2766,7 +2792,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       }
     }
   }
-      
 
   return SDValue();
 }
@@ -3021,6 +3046,18 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (or x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N1;
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
+
+    // fold (or x, -1) -> -1, vector edition
+    if (ISD::isBuildVectorAllOnes(N0.getNode()))
+      return N0;
+    if (ISD::isBuildVectorAllOnes(N1.getNode()))
+      return N1;
   }
 
   // fold (or x, undef) -> -1
@@ -3330,6 +3367,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   if (VT.isVector()) {
     SDValue FoldedVOp = SimplifyVBinOp(N);
     if (FoldedVOp.getNode()) return FoldedVOp;
+
+    // fold (xor x, 0) -> x, vector edition
+    if (ISD::isBuildVectorAllZeros(N0.getNode()))
+      return N1;
+    if (ISD::isBuildVectorAllZeros(N1.getNode()))
+      return N0;
   }
 
   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
@@ -5025,11 +5068,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
       // At this point, we must have a load or else we can't do the transform.
       if (!isa<LoadSDNode>(N0)) return SDValue();
 
+      // Because a SRL must be assumed to *need* to zero-extend the high bits
+      // (as opposed to anyext the high bits), we can't combine the zextload
+      // lowering of SRL and an sextload.
+      if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
+        return SDValue();
+
       // If the shift amount is larger than the input type then we're not
       // accessing any of the loaded bytes.  If the load was a zextload/extload
       // then the result of the shift+trunc is zero/undef (handled elsewhere).
-      // If the load was a sextload then the result is a splat of the sign bit
-      // of the extended byte.  This is not worth optimizing for.
       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
         return SDValue();
     }
@@ -7552,7 +7599,14 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   if (BasePtr.first.getOpcode() == ISD::UNDEF)
     return false;
 
+  // Save the LoadSDNodes that we find in the chain.
+  // We need to make sure that these nodes do not interfere with
+  // any of the store nodes.
+  SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
+
+  // Save the StoreSDNodes that we find in the chain.
   SmallVector<MemOpLink, 8> StoreNodes;
+
   // Walk up the chain and look for nodes with offsets from the same
   // base pointer. Stop when reaching an instruction with a different kind
   // or instruction which has a different base pointer.
@@ -7596,8 +7650,26 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     // We found a potential memory operand to merge.
     StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++));
 
-    // Move up the chain to the next memory operation.
-    Index = dyn_cast<StoreSDNode>(Index->getChain().getNode());
+    // Find the next memory operand in the chain. If the next operand in the
+    // chain is a store then move up and continue the scan with the next
+    // memory operand. If the next operand is a load save it and use alias
+    // information to check if it interferes with anything.
+    SDNode *NextInChain = Index->getChain().getNode();
+    while (1) {
+      if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
+        // We found a store node. Use it for the next iteration.
+        Index = STn;
+        break;
+      } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
+        // Save the load node for later. Continue the scan.
+        AliasLoadNodes.push_back(Ldn);
+        NextInChain = Ldn->getChain().getNode();
+        continue;
+      } else {
+        Index = NULL;
+        break;
+      }
+    }
   }
 
   // Check if there is anything to merge.
@@ -7612,9 +7684,25 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   // store memory address.
   unsigned LastConsecutiveStore = 0;
   int64_t StartAddress = StoreNodes[0].OffsetFromBase;
-  for (unsigned i=1; i<StoreNodes.size(); ++i) {
-    int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
-    if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+  for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
+
+    // Check that the addresses are consecutive starting from the second
+    // element in the list of stores.
+    if (i > 0) {
+      int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
+      if (CurrAddress - StartAddress != (ElementSizeBytes * i))
+        break;
+    }
+
+    bool Alias = false;
+    // Check if this store interferes with any of the loads that we found.
+    for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld)
+      if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) {
+        Alias = true;
+        break;
+      }
+    // We found a load that alias with this store. Stop the sequence.
+    if (Alias)
       break;
 
     // Mark this node as useful.
@@ -8116,8 +8204,21 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
 
   // Only perform this optimization before the types are legal, because we
   // don't want to perform this optimization on every DAGCombine invocation.
-  if (!LegalTypes && MergeConsecutiveStores(ST))
-    return SDValue(N, 0);
+  if (!LegalTypes) {
+    bool EverChanged = false;
+
+    do {
+      // There can be multiple store sequences on the same chain.
+      // Keep trying to merge store sequences until we are unable to do so
+      // or until we merge the last store on the chain.
+      bool Changed = MergeConsecutiveStores(ST);
+      EverChanged |= Changed;
+      if (!Changed) break;
+    } while (ST->getOpcode() != ISD::DELETED_NODE);
+
+    if (EverChanged)
+      return SDValue(N, 0);
+  }
 
   return ReduceLoadOpStoreWidth(N);
 }
@@ -9680,6 +9781,23 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
   return true;
 }
 
+bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) {
+  SDValue Ptr0, Ptr1;
+  int64_t Size0, Size1;
+  const Value *SrcValue0, *SrcValue1;
+  int SrcValueOffset0, SrcValueOffset1;
+  unsigned SrcValueAlign0, SrcValueAlign1;
+  const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1;
+  FindAliasInfo(Op0, Ptr0, Size0, SrcValue0, SrcValueOffset0,
+                SrcValueAlign0, SrcTBAAInfo0);
+  FindAliasInfo(Op1, Ptr1, Size1, SrcValue1, SrcValueOffset1,
+                SrcValueAlign1, SrcTBAAInfo1);
+  return isAlias(Ptr0, Size0, SrcValue0, SrcValueOffset0,
+                 SrcValueAlign0, SrcTBAAInfo0,
+                 Ptr1, Size1, SrcValue1, SrcValueOffset1,
+                 SrcValueAlign1, SrcTBAAInfo1);
+}
+
 /// FindAliasInfo - Extracts the relevant alias information from the memory
 /// node.  Returns true if the operand was a load.
 bool DAGCombiner::FindAliasInfo(SDNode *N,
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 4854cf7b26..e900c6b603 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -40,27 +40,27 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "isel"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Operator.h"
-#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Analysis/Loads.h"
 #include "llvm/DataLayout.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index a4182906cb..8799effe20 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -13,8 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "function-lowering-info"
-#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -22,20 +29,13 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
-#include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index a8381b25ba..ae10609db1 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -16,18 +16,18 @@
 #define DEBUG_TYPE "instr-emitter"
 #include "InstrEmitter.h"
 #include "SDNodeDbgValue.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 /// MinRCSize - Smallest register class we allow when constraining virtual
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 9eddee9e33..9bfb51db8c 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -16,9 +16,9 @@
 #ifndef INSTREMITTER_H
 #define INSTREMITTER_H
 
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 
 namespace llvm {
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index abf40b77a1..2c249fcaf9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -11,26 +11,26 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/Constants.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -731,9 +731,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
           return;
         }
         case TargetLowering::Promote: {
-          assert(VT.isVector() && "Unknown legal promote case!");
-          Value = DAG.getNode(ISD::BITCAST, dl,
-                             TLI.getTypeToPromoteTo(ISD::STORE, VT), Value);
+          EVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
+          assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+                 "Can only promote stores to same size type");
+          Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
           SDValue Result =
             DAG.getStore(Chain, dl, Value, Ptr,
                          ST->getPointerInfo(), isVolatile,
@@ -889,10 +890,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
       break;
     }
     case TargetLowering::Promote: {
-      // Only promote a load of vector type to another.
-      assert(VT.isVector() && "Cannot promote this load!");
-      // Change base type to a different vector type.
       EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+      assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+             "Can only promote loads to same size type");
 
       SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
                          LD->isVolatile(), LD->isNonTemporal(),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 644e36e35e..c8b3a6c2a6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -14,9 +14,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/CallingConv.h"
 #include "llvm/DataLayout.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 20b7ce6b15..8c53ba38e3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -17,12 +17,12 @@
 #define SELECTIONDAG_LEGALIZETYPES_H
 
 #define DEBUG_TYPE "legalize-types"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
 
@@ -578,6 +578,7 @@ private:
 
   // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
   bool SplitVectorOperand(SDNode *N, unsigned OpNo);
+  SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo);
   SDValue SplitVecOp_UnaryOp(SDNode *N);
 
   SDValue SplitVecOp_BITCAST(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index d51a6eb192..1700ce8b1e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1030,7 +1030,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     case ISD::STORE:
       Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
       break;
-
+    case ISD::VSELECT:
+      Res = SplitVecOp_VSELECT(N, OpNo);
+      break;
     case ISD::CTTZ:
     case ISD::CTLZ:
     case ISD::CTPOP:
@@ -1064,6 +1066,58 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
   return false;
 }
 
+SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
+  // The only possibility for an illegal operand is the mask, since result type
+  // legalization would have handled this node already otherwise.
+  assert(OpNo == 0 && "Illegal operand must be mask");
+
+  SDValue Mask = N->getOperand(0);
+  SDValue Src0 = N->getOperand(1);
+  SDValue Src1 = N->getOperand(2);
+  DebugLoc DL = N->getDebugLoc();
+  EVT MaskVT = Mask.getValueType();
+  assert(MaskVT.isVector() && "VSELECT without a vector mask?");
+
+  SDValue Lo, Hi;
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+  assert(Lo.getValueType() == Hi.getValueType() &&
+         "Lo and Hi have differing types");;
+
+  unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
+  unsigned HiNumElts = Hi.getValueType().getVectorNumElements();
+  assert(LoNumElts == HiNumElts && "Asymmetric vector split?");
+
+  LLVMContext &Ctx = *DAG.getContext();
+  SDValue Zero = DAG.getIntPtrConstant(0);
+  SDValue LoElts = DAG.getIntPtrConstant(LoNumElts);
+  EVT Src0VT = Src0.getValueType();
+  EVT Src0EltTy = Src0VT.getVectorElementType();
+  EVT MaskEltTy = MaskVT.getVectorElementType();
+
+  EVT LoOpVT = EVT::getVectorVT(Ctx, Src0EltTy, LoNumElts);
+  EVT LoMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, LoNumElts);
+  EVT HiOpVT = EVT::getVectorVT(Ctx, Src0EltTy, HiNumElts);
+  EVT HiMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, HiNumElts);
+
+  SDValue LoOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src0, Zero);
+  SDValue LoOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src1, Zero);
+
+  SDValue HiOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src0, LoElts);
+  SDValue HiOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src1, LoElts);
+
+  SDValue LoMask =
+    DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoMaskVT, Mask, Zero);
+  SDValue HiMask =
+    DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiMaskVT, Mask, LoElts);
+
+  SDValue LoSelect =
+    DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
+  SDValue HiSelect =
+    DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1);
+
+  return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect);
+}
+
 SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
   // The result has a legal vector type, but the input needs splitting.
   EVT ResVT = N->getValueType(0);
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index c3794d5f78..a9b6a2eca8 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -21,13 +21,13 @@
 
 #define DEBUG_TYPE "scheduler"
 #include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -604,10 +604,8 @@ SUnit *ResourcePriorityQueue::pop() {
   std::vector<SUnit *>::iterator Best = Queue.begin();
   if (!DisableDFASched) {
     signed BestCost = SUSchedulingCost(*Best);
-    for (std::vector<SUnit *>::iterator I = Queue.begin(),
+    for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
            E = Queue.end(); I != E; ++I) {
-      if (*I == *Best)
-        continue;
 
       if (SUSchedulingCost(*I) > BestCost) {
         BestCost = SUSchedulingCost(*I);
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 2dcb229573..4af7172847 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -15,8 +15,8 @@
 #define LLVM_CODEGEN_SDNODEDBGVALUE_H
 
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/DebugLoc.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/DebugLoc.h"
 
 namespace llvm {
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 2ecdd89415..b7c47db58e 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -12,20 +12,20 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
-#include "ScheduleDAGSDNodes.h"
-#include "InstrEmitter.h"
-#include "llvm/InlineAsm.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
+#include "InstrEmitter.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 STATISTIC(NumUnfolds,    "Number of nodes unfolded");
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index dc8f0ee4a2..bab0c2764a 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -16,22 +16,22 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
-#include "ScheduleDAGSDNodes.h"
-#include "llvm/InlineAsm.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/DataLayout.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <climits>
 using namespace llvm;
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index a197fcbfa5..057450de2b 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -13,26 +13,26 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
-#include "SDNodeDbgValue.h"
 #include "ScheduleDAGSDNodes.h"
 #include "InstrEmitter.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "SDNodeDbgValue.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 STATISTIC(LoadsClustered, "Number of loads clustered together");
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index f8ca7b1d40..7ddc6e8650 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -19,19 +19,19 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ResourcePriorityQueue.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/ResourcePriorityQueue.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <climits>
 using namespace llvm;
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f000ce38d3..29339405aa 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -12,42 +12,42 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "SDNodeOrdering.h"
 #include "SDNodeDbgValue.h"
+#include "SDNodeOrdering.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/CallingConv.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalAlias.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Intrinsics.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Mutex.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
 #include <algorithm>
 #include <cmath>
 using namespace llvm;
@@ -3373,7 +3373,7 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
   unsigned NumVTBytes = VT.getSizeInBits() / 8;
   unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size()));
 
-  uint64_t Val = 0;
+  APInt Val(NumBytes*8, 0);
   if (TLI.isLittleEndian()) {
     for (unsigned i = 0; i != NumBytes; ++i)
       Val |= (uint64_t)(unsigned char)Str[i] << i*8;
@@ -3382,7 +3382,9 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
       Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8;
   }
 
-  return DAG.getConstant(Val, VT);
+  if (TLI.isIntImmLegal(Val, VT))
+    return DAG.getConstant(Val, VT);
+  return SDValue(0, 0);
 }
 
 /// getMemBasePlusOffset - Returns base and offset node for the
@@ -3422,6 +3424,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
                                      unsigned DstAlign, unsigned SrcAlign,
                                      bool IsZeroVal,
                                      bool MemcpyStrSrc,
+                                     bool AllowOverlap,
                                      SelectionDAG &DAG,
                                      const TargetLowering &TLI) {
   assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
@@ -3461,24 +3464,47 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
 
   unsigned NumMemOps = 0;
   while (Size != 0) {
+    if (++NumMemOps > Limit)
+      return false;
+
     unsigned VTSize = VT.getSizeInBits() / 8;
     while (VTSize > Size) {
       // For now, only use non-vector load / store's for the left-over pieces.
+      EVT NewVT;
+      unsigned NewVTSize;
       if (VT.isVector() || VT.isFloatingPoint()) {
-        VT = MVT::i64;
-        while (!TLI.isTypeLegal(VT))
-          VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
-        VTSize = VT.getSizeInBits() / 8;
+        NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
+        while (!TLI.isOperationLegalOrCustom(ISD::STORE, NewVT)) {
+          if (NewVT == MVT::i64 &&
+              TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64)) {
+            // i64 is usually not legal on 32-bit targets, but f64 may be.
+            NewVT = MVT::f64;
+            break;
+          }
+          NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
+        }
+        NewVTSize = NewVT.getSizeInBits() / 8;
       } else {
         // This can result in a type that is not legal on the target, e.g.
         // 1 or 2 bytes on PPC.
-        VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
-        VTSize >>= 1;
+        NewVT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+        NewVTSize = VTSize >> 1;
+      }
+
+      // If the new VT cannot cover all of the remaining bits, then consider
+      // issuing a (or a pair of) unaligned and overlapping load / store.
+      // FIXME: Only does this for 64-bit or more since we don't have proper
+      // cost model for unaligned load / store.
+      bool Fast;
+      if (AllowOverlap && VTSize >= 8 && NewVTSize < Size &&
+          TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast)
+        VTSize = Size;
+      else {
+        VT = NewVT;
+        VTSize = NewVTSize;
       }
     }
 
-    if (++NumMemOps > Limit)
-      return false;
     MemOps.push_back(VT);
     Size -= VTSize;
   }
@@ -3523,7 +3549,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
   if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
                                 (DstAlignCanChange ? 0 : Align),
                                 (isZeroStr ? 0 : SrcAlign),
-                                true, CopyFromStr, DAG, TLI))
+                                true, CopyFromStr, true, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -3545,6 +3571,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
     unsigned VTSize = VT.getSizeInBits() / 8;
     SDValue Value, Store;
 
+    if (VTSize > Size) {
+      // Issuing an unaligned load / store pair  that overlaps with the previous
+      // pair. Adjust the offset accordingly.
+      assert(i == NumMemOps-1 && i != 0);
+      SrcOff -= VTSize - Size;
+      DstOff -= VTSize - Size;
+    }
+
     if (CopyFromStr &&
         (isZeroStr || (VT.isInteger() && !VT.isVector()))) {
       // It's unlikely a store of a vector immediate can be done in a single
@@ -3553,11 +3587,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
       // FIXME: Handle other cases where store of vector immediate is done in
       // a single instruction.
       Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff));
-      Store = DAG.getStore(Chain, dl, Value,
-                           getMemBasePlusOffset(Dst, DstOff, DAG),
-                           DstPtrInfo.getWithOffset(DstOff), isVol,
-                           false, Align);
-    } else {
+      if (Value.getNode())
+        Store = DAG.getStore(Chain, dl, Value,
+                             getMemBasePlusOffset(Dst, DstOff, DAG),
+                             DstPtrInfo.getWithOffset(DstOff), isVol,
+                             false, Align);
+    }
+
+    if (!Store.getNode()) {
       // The type might not be legal for the target.  This should only happen
       // if the type is smaller than a legal type, as on PPC, so the right
       // thing to do is generate a LoadExt/StoreTrunc pair.  These simplify
@@ -3577,6 +3614,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
     OutChains.push_back(Store);
     SrcOff += VTSize;
     DstOff += VTSize;
+    Size -= VTSize;
   }
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
@@ -3613,7 +3651,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
 
   if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
                                 (DstAlignCanChange ? 0 : Align),
-                                SrcAlign, true, false, DAG, TLI))
+                                SrcAlign, true, false, false, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -3689,7 +3727,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
     isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
   if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
                                 Size, (DstAlignCanChange ? 0 : Align), 0,
-                                IsZeroVal, false, DAG, TLI))
+                                IsZeroVal, false, true, DAG, TLI))
     return SDValue();
 
   if (DstAlignCanChange) {
@@ -3716,6 +3754,13 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
 
   for (unsigned i = 0; i < NumMemOps; i++) {
     EVT VT = MemOps[i];
+    unsigned VTSize = VT.getSizeInBits() / 8;
+    if (VTSize > Size) {
+      // Issuing an unaligned load / store pair  that overlaps with the previous
+      // pair. Adjust the offset accordingly.
+      assert(i == NumMemOps-1 && i != 0);
+      DstOff -= VTSize - Size;
+    }
 
     // If this store is smaller than the largest store see whether we can get
     // the smaller value for free with a truncate.
@@ -3734,6 +3779,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
                                  isVol, false, Align);
     OutChains.push_back(Store);
     DstOff += VT.getSizeInBits() / 8;
+    Size -= VTSize;
   }
 
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 649b1c4897..26a79c075c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -12,51 +12,51 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "isel"
-#include "SDNodeDbgValue.h"
 #include "SelectionDAGBuilder.h"
+#include "SDNodeDbgValue.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Constants.h"
 #include "llvm/CallingConv.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Constants.h"
 #include "llvm/DataLayout.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IntegersSubsetMapping.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/IntegersSubsetMapping.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -769,9 +769,11 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
     EVT ValueVT = ValueVTs[Value];
     unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
     EVT RegisterVT = RegVTs[Value];
+    ISD::NodeType ExtendKind =
+      TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND;
 
     getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
-                   &Parts[Part], NumParts, RegisterVT, V);
+                   &Parts[Part], NumParts, RegisterVT, V, ExtendKind);
     Part += NumParts;
   }
 
@@ -5293,11 +5295,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
       !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
     isTailCall = false;
 
-  // If there's a possibility that fast-isel has already selected some amount
-  // of the current basic block, don't emit a tail call.
-  if (isTailCall && TM.Options.EnableFastISel)
-    isTailCall = false;
-
   TargetLowering::
   CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG,
                        getCurDebugLoc(), CS);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 5818c09f29..7bb1bb1fea 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -14,12 +14,12 @@
 #ifndef SELECTIONDAGBUILDER_H
 #define SELECTIONDAGBUILDER_H
 
-#include "llvm/Constants.h"
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Constants.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <vector>
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index a870ee2ac8..def18a203e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -11,23 +11,23 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "ScheduleDAGSDNodes.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
 std::string SDNode::getOperationName(const SelectionDAG *G) const {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 193f0e7d09..29942a6575 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -12,23 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "isel"
+#include "llvm/CodeGen/SelectionDAGISel.h"
 #include "ScheduleDAGSDNodes.h"
 #include "SelectionDAGBuilder.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -37,23 +31,29 @@
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Timer.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/Statistic.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -998,12 +998,12 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
 
       if (AllPredsVisited) {
         for (BasicBlock::const_iterator I = LLVMBB->begin();
-             isa<PHINode>(I); ++I)
-          FuncInfo->ComputePHILiveOutRegInfo(cast<PHINode>(I));
+             const PHINode *PN = dyn_cast<PHINode>(I); ++I)
+          FuncInfo->ComputePHILiveOutRegInfo(PN);
       } else {
         for (BasicBlock::const_iterator I = LLVMBB->begin();
-             isa<PHINode>(I); ++I)
-          FuncInfo->InvalidatePHILiveOutRegInfo(cast<PHINode>(I));
+             const PHINode *PN = dyn_cast<PHINode>(I); ++I)
+          FuncInfo->InvalidatePHILiveOutRegInfo(PN);
       }
 
       FuncInfo->VisitedBBs.insert(LLVMBB);
@@ -1105,19 +1105,21 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           }
 
           bool HadTailCall = false;
+          MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt;
           SelectBasicBlock(Inst, BI, HadTailCall);
 
-          // Recompute NumFastIselRemaining as Selection DAG instruction
-          // selection may have handled the call, input args, etc.
-          unsigned RemainingNow = std::distance(Begin, BI);
-          NumFastIselFailures += NumFastIselRemaining - RemainingNow;
-
           // If the call was emitted as a tail call, we're done with the block.
+          // We also need to delete any previously emitted instructions.
           if (HadTailCall) {
+            FastIS->removeDeadCode(SavedInsertPt, FuncInfo->MBB->end());
             --BI;
             break;
           }
 
+          // Recompute NumFastIselRemaining as Selection DAG instruction
+          // selection may have handled the call, input args, etc.
+          unsigned RemainingNow = std::distance(Begin, BI);
+          NumFastIselFailures += NumFastIselRemaining - RemainingNow;
           NumFastIselRemaining = RemainingNow;
           continue;
         }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 3921635652..ac6ddc1533 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -11,21 +11,21 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "ScheduleDAGSDNodes.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 namespace llvm {
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 794935dad5..35f1931494 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -12,24 +12,24 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetLowering.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <cctype>
 using namespace llvm;
 
@@ -825,7 +825,7 @@ void TargetLowering::computeRegisterProperties() {
     // that wider vector type.
     EVT EltVT = VT.getVectorElementType();
     unsigned NElts = VT.getVectorNumElements();
-    if (NElts != 1) {
+    if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) {
       bool IsLegalWiderType = false;
       // First try to promote the elements of integer vectors. If no legal
       // promotion was found, fallback to the widen-vector method.
@@ -3324,7 +3324,7 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
 SDValue TargetLowering::
 BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
-          std::vector<SDNode*>* Created) const {
+          std::vector<SDNode*> *Created) const {
   EVT VT = N->getValueType(0);
   DebugLoc dl= N->getDebugLoc();
 
@@ -3384,7 +3384,7 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
 SDValue TargetLowering::
 BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
-          std::vector<SDNode*>* Created) const {
+          std::vector<SDNode*> *Created) const {
   EVT VT = N->getValueType(0);
   DebugLoc dl = N->getDebugLoc();
 
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 8a6b120f97..d8f63ca4a9 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -26,12 +26,12 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "shadowstackgc"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/GCStrategy.h"
-#include "llvm/CodeGen/GCs.h"
 #include "llvm/Support/CallSite.h"
 
 using namespace llvm;
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index 4fbe1b3605..217f088fa3 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -35,22 +35,22 @@
 #define DEBUG_TYPE "shrink-wrap"
 
 #include "PrologEpilogInserter.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <sstream>
 
 using namespace llvm;
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 4b566fcba9..ed30ac12ce 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -13,7 +13,14 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "sjljehprepare"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/Instructions.h"
@@ -21,16 +28,9 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 4cd22eb60f..209792fd40 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -10,7 +10,6 @@
 #define DEBUG_TYPE "spiller"
 
 #include "Spiller.h"
-#include "VirtRegMap.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
@@ -19,12 +18,13 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index dca15ee758..0a3818e43f 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -14,7 +14,6 @@
 
 #define DEBUG_TYPE "regalloc"
 #include "SplitKit.h"
-#include "VirtRegMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveRangeEdit.h"
@@ -22,6 +21,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index e306a2f2c2..de365cc1ff 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -22,39 +22,39 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "stackcoloring"
+#include "llvm/CodeGen/Passes.h"
 #include "MachineTraceMetrics.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SparseSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/DebugInfo.h"
+#include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 31e9ec0ac0..a4a0861b89 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -16,9 +16,11 @@
 
 #define DEBUG_TYPE "stack-protector"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Attributes.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
@@ -26,10 +28,8 @@
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/Triple.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index d349abc357..0130f4d934 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -12,8 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "stackslotcoloring"
-#include "llvm/Module.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -22,14 +25,11 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 #include <vector>
 using namespace llvm;
 
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 39fd600d4a..b337c53933 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -39,17 +39,17 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "strongphielim"
-#include "PHIEliminationUtils.h"
 #include "llvm/CodeGen/Passes.h"
+#include "PHIEliminationUtils.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 1497d1ba62..d20a8811a2 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -13,25 +13,25 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "tailduplication"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineSSAUpdater.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Function.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 STATISTIC(NumTails     , "Number of tails duplicated");
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index cadb87815d..883e9d1846 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -11,12 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
 #include <cstdlib>
 using namespace llvm;
 
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 433f2ea061..d5fbf14d27 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===-- TargetInstrInfoImpl.cpp - Target Instruction Information ----------===//
+//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,39 +7,96 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the TargetInstrInfoImpl class, it just provides default
-// implementations of various methods.
+// This file implements the TargetInstrInfo class.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cctype>
 using namespace llvm;
 
 static cl::opt<bool> DisableHazardRecognizer(
   "disable-sched-hazard", cl::Hidden, cl::init(false),
   cl::desc("Disable hazard detection during preRA scheduling"));
 
+TargetInstrInfo::~TargetInstrInfo() {
+}
+
+const TargetRegisterClass*
+TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+                             const TargetRegisterInfo *TRI,
+                             const MachineFunction &MF) const {
+  if (OpNum >= MCID.getNumOperands())
+    return 0;
+
+  short RegClass = MCID.OpInfo[OpNum].RegClass;
+  if (MCID.OpInfo[OpNum].isLookupPtrRegClass())
+    return TRI->getPointerRegClass(MF, RegClass);
+
+  // Instructions like INSERT_SUBREG do not have fixed register classes.
+  if (RegClass < 0)
+    return 0;
+
+  // Otherwise just look it up normally.
+  return TRI->getRegClass(RegClass);
+}
+
+/// insertNoop - Insert a noop into the instruction stream at the specified
+/// point.
+void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI) const {
+  llvm_unreachable("Target didn't implement insertNoop!");
+}
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorString or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorString or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
+                                             const MCAsmInfo &MAI) const {
+
+
+  // Count the number of instructions in the asm.
+  bool atInsnStart = true;
+  unsigned Length = 0;
+  for (; *Str; ++Str) {
+    if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+                                strlen(MAI.getSeparatorString())) == 0)
+      atInsnStart = true;
+    if (atInsnStart && !std::isspace(*Str)) {
+      Length += MAI.getMaxInstLength();
+      atInsnStart = false;
+    }
+    if (atInsnStart && strncmp(Str, MAI.getCommentString(),
+                               strlen(MAI.getCommentString())) == 0)
+      atInsnStart = false;
+  }
+
+  return Length;
+}
+
 /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
 /// after it, replacing it with an unconditional branch to NewDest.
 void
-TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
-                                             MachineBasicBlock *NewDest) const {
+TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+                                         MachineBasicBlock *NewDest) const {
   MachineBasicBlock *MBB = Tail->getParent();
 
   // Remove all the old successors of MBB from the CFG.
@@ -58,8 +115,8 @@ TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
 
 // commuteInstruction - The default implementation of this method just exchanges
 // the two operands returned by findCommutedOpIndices.
-MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
-                                                      bool NewMI) const {
+MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
+                                                  bool NewMI) const {
   const MCInstrDesc &MCID = MI->getDesc();
   bool HasDef = MCID.getNumDefs();
   if (HasDef && !MI->getOperand(0).isReg())
@@ -119,11 +176,11 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
 /// findCommutedOpIndices - If specified MI is commutable, return the two
 /// operand indices that would swap value. Return true if the instruction
 /// is not in a form which this routine understands.
-bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
-                                                unsigned &SrcOpIdx1,
-                                                unsigned &SrcOpIdx2) const {
+bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI,
+                                            unsigned &SrcOpIdx1,
+                                            unsigned &SrcOpIdx2) const {
   assert(!MI->isBundle() &&
-         "TargetInstrInfoImpl::findCommutedOpIndices() can't handle bundles");
+         "TargetInstrInfo::findCommutedOpIndices() can't handle bundles");
 
   const MCInstrDesc &MCID = MI->getDesc();
   if (!MCID.isCommutable())
@@ -141,7 +198,7 @@ bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
 
 
 bool
-TargetInstrInfoImpl::isUnpredicatedTerminator(const MachineInstr *MI) const {
+TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
   if (!MI->isTerminator()) return false;
 
   // Conditional branch is a special case.
@@ -153,12 +210,12 @@ TargetInstrInfoImpl::isUnpredicatedTerminator(const MachineInstr *MI) const {
 }
 
 
-bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
+bool TargetInstrInfo::PredicateInstruction(MachineInstr *MI,
                             const SmallVectorImpl<MachineOperand> &Pred) const {
   bool MadeChange = false;
 
   assert(!MI->isBundle() &&
-         "TargetInstrInfoImpl::PredicateInstruction() can't handle bundles");
+         "TargetInstrInfo::PredicateInstruction() can't handle bundles");
 
   const MCInstrDesc &MCID = MI->getDesc();
   if (!MI->isPredicable())
@@ -183,9 +240,9 @@ bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
   return MadeChange;
 }
 
-bool TargetInstrInfoImpl::hasLoadFromStackSlot(const MachineInstr *MI,
-                                        const MachineMemOperand *&MMO,
-                                        int &FrameIndex) const {
+bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+                                           const MachineMemOperand *&MMO,
+                                           int &FrameIndex) const {
   for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
          oe = MI->memoperands_end();
        o != oe;
@@ -201,9 +258,9 @@ bool TargetInstrInfoImpl::hasLoadFromStackSlot(const MachineInstr *MI,
   return false;
 }
 
-bool TargetInstrInfoImpl::hasStoreToStackSlot(const MachineInstr *MI,
-                                       const MachineMemOperand *&MMO,
-                                       int &FrameIndex) const {
+bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+                                          const MachineMemOperand *&MMO,
+                                          int &FrameIndex) const {
   for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
          oe = MI->memoperands_end();
        o != oe;
@@ -219,26 +276,26 @@ bool TargetInstrInfoImpl::hasStoreToStackSlot(const MachineInstr *MI,
   return false;
 }
 
-void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
-                                        MachineBasicBlock::iterator I,
-                                        unsigned DestReg,
-                                        unsigned SubIdx,
-                                        const MachineInstr *Orig,
-                                        const TargetRegisterInfo &TRI) const {
+void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator I,
+                                    unsigned DestReg,
+                                    unsigned SubIdx,
+                                    const MachineInstr *Orig,
+                                    const TargetRegisterInfo &TRI) const {
   MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
   MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI);
   MBB.insert(I, MI);
 }
 
 bool
-TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
-                                      const MachineInstr *MI1,
-                                      const MachineRegisterInfo *MRI) const {
+TargetInstrInfo::produceSameValue(const MachineInstr *MI0,
+                                  const MachineInstr *MI1,
+                                  const MachineRegisterInfo *MRI) const {
   return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
 }
 
-MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
-                                             MachineFunction &MF) const {
+MachineInstr *TargetInstrInfo::duplicate(MachineInstr *Orig,
+                                         MachineFunction &MF) const {
   assert(!Orig->isNotDuplicable() &&
          "Instruction cannot be duplicated");
   return MF.CloneMachineInstr(Orig);
@@ -278,7 +335,7 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
   return 0;
 }
 
-bool TargetInstrInfoImpl::
+bool TargetInstrInfo::
 canFoldMemoryOperand(const MachineInstr *MI,
                      const SmallVectorImpl<unsigned> &Ops) const {
   return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
@@ -459,9 +516,9 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
 /// isSchedulingBoundary - Test if the given instruction should be
 /// considered a scheduling boundary. This primarily includes labels
 /// and terminators.
-bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
-                                               const MachineBasicBlock *MBB,
-                                               const MachineFunction &MF) const{
+bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+                                           const MachineBasicBlock *MBB,
+                                           const MachineFunction &MF) const {
   // Terminators and labels can't be scheduled around.
   if (MI->isTerminator() || MI->isLabel())
     return true;
@@ -481,12 +538,12 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
 
 // Provide a global flag for disabling the PreRA hazard recognizer that targets
 // may choose to honor.
-bool TargetInstrInfoImpl::usePreRAHazardRecognizer() const {
+bool TargetInstrInfo::usePreRAHazardRecognizer() const {
   return !DisableHazardRecognizer;
 }
 
 // Default implementation of CreateTargetRAHazardRecognizer.
-ScheduleHazardRecognizer *TargetInstrInfoImpl::
+ScheduleHazardRecognizer *TargetInstrInfo::
 CreateTargetHazardRecognizer(const TargetMachine *TM,
                              const ScheduleDAG *DAG) const {
   // Dummy hazard recognizer allows all instructions to issue.
@@ -494,7 +551,7 @@ CreateTargetHazardRecognizer(const TargetMachine *TM,
 }
 
 // Default implementation of CreateTargetMIHazardRecognizer.
-ScheduleHazardRecognizer *TargetInstrInfoImpl::
+ScheduleHazardRecognizer *TargetInstrInfo::
 CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
                                const ScheduleDAG *DAG) const {
   return (ScheduleHazardRecognizer *)
@@ -502,7 +559,7 @@ CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
 }
 
 // Default implementation of CreateTargetPostRAHazardRecognizer.
-ScheduleHazardRecognizer *TargetInstrInfoImpl::
+ScheduleHazardRecognizer *TargetInstrInfo::
 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
                                    const ScheduleDAG *DAG) const {
   return (ScheduleHazardRecognizer *)
@@ -514,9 +571,9 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
 //===----------------------------------------------------------------------===//
 
 int
-TargetInstrInfoImpl::getOperandLatency(const InstrItineraryData *ItinData,
-                                       SDNode *DefNode, unsigned DefIdx,
-                                       SDNode *UseNode, unsigned UseIdx) const {
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                                   SDNode *DefNode, unsigned DefIdx,
+                                   SDNode *UseNode, unsigned UseIdx) const {
   if (!ItinData || ItinData->isEmpty())
     return -1;
 
@@ -530,8 +587,8 @@ TargetInstrInfoImpl::getOperandLatency(const InstrItineraryData *ItinData,
   return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
 }
 
-int TargetInstrInfoImpl::getInstrLatency(const InstrItineraryData *ItinData,
-                                         SDNode *N) const {
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                     SDNode *N) const {
   if (!ItinData || ItinData->isEmpty())
     return 1;
 
@@ -546,8 +603,8 @@ int TargetInstrInfoImpl::getInstrLatency(const InstrItineraryData *ItinData,
 //===----------------------------------------------------------------------===//
 
 unsigned
-TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData,
-                                    const MachineInstr *MI) const {
+TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+                                const MachineInstr *MI) const {
   if (!ItinData || ItinData->isEmpty())
     return 1;
 
@@ -573,7 +630,7 @@ unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel,
   return 1;
 }
 
-unsigned TargetInstrInfoImpl::
+unsigned TargetInstrInfo::
 getInstrLatency(const InstrItineraryData *ItinData,
                 const MachineInstr *MI,
                 unsigned *PredCost) const {
@@ -585,9 +642,9 @@ getInstrLatency(const InstrItineraryData *ItinData,
   return ItinData->getStageLatency(MI->getDesc().getSchedClass());
 }
 
-bool TargetInstrInfoImpl::hasLowDefLatency(const InstrItineraryData *ItinData,
-                                           const MachineInstr *DefMI,
-                                           unsigned DefIdx) const {
+bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
+                                       const MachineInstr *DefMI,
+                                       unsigned DefIdx) const {
   if (!ItinData || ItinData->isEmpty())
     return false;
 
@@ -598,7 +655,7 @@ bool TargetInstrInfoImpl::hasLowDefLatency(const InstrItineraryData *ItinData,
 
 /// Both DefMI and UseMI must be valid.  By default, call directly to the
 /// itinerary. This may be overriden by the target.
-int TargetInstrInfoImpl::
+int TargetInstrInfo::
 getOperandLatency(const InstrItineraryData *ItinData,
                   const MachineInstr *DefMI, unsigned DefIdx,
                   const MachineInstr *UseMI, unsigned UseIdx) const {
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 0fa68c4e1a..358c873fad 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -13,30 +13,30 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalVariable.h"
-#include "llvm/Module.h"
-#include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Module.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 using namespace dwarf;
 
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index be8b582890..9b776d1412 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -11,10 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -246,3 +249,38 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
   }
   return BestRC;
 }
+
+// Compute target-independent register allocator hints to help eliminate copies.
+void
+TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
+                                          ArrayRef<MCPhysReg> Order,
+                                          SmallVectorImpl<MCPhysReg> &Hints,
+                                          const MachineFunction &MF,
+                                          const VirtRegMap *VRM) const {
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
+
+  // Hints with HintType != 0 were set by target-dependent code.
+  // Such targets must provide their own implementation of
+  // TRI::getRegAllocationHints to interpret those hint types.
+  assert(Hint.first == 0 && "Target must implement TRI::getRegAllocationHints");
+
+  // Target-independent hints are either a physical or a virtual register.
+  unsigned Phys = Hint.second;
+  if (VRM && isVirtualRegister(Phys))
+    Phys = VRM->getPhys(Phys);
+
+  // Check that Phys is a valid hint in VirtReg's register class.
+  if (!isPhysicalRegister(Phys))
+    return;
+  if (MRI.isReserved(Phys))
+    return;
+  // Check that Phys is in the allocation order. We shouldn't heed hints
+  // from VirtReg's register class if they aren't in the allocation order. The
+  // target probably has a reason for removing the register.
+  if (std::find(Order.begin(), Order.end(), Phys) == Order.end())
+    return;
+
+  // All clear, tell the register allocator to prefer this register.
+  Hints.push_back(Phys);
+}
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index ca3b0e0b11..f31f67d58c 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -13,12 +13,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index a9058bc7f6..cf7f7f1a1e 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -29,26 +29,26 @@
 
 #define DEBUG_TYPE "twoaddrinstr"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Function.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
@@ -92,10 +92,6 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   // virtual registers. e.g. r1 = move v1024.
   DenseMap<unsigned, unsigned> DstRegMap;
 
-  /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen
-  /// during the initial walk of the machine function.
-  SmallVector<MachineInstr*, 16> RegSequences;
-
   bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
                             MachineBasicBlock::iterator OldPos);
 
@@ -135,11 +131,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap;
   bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&);
   void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist);
-
-  /// eliminateRegSequences - Eliminate REG_SEQUENCE instructions as part of
-  /// the de-ssa process. This replaces sources of REG_SEQUENCE as sub-register
-  /// references of the register defined by REG_SEQUENCE.
-  bool eliminateRegSequences();
+  void eliminateRegSequence(MachineBasicBlock::iterator&);
 
 public:
   static char ID; // Pass identification, replacement for typeid
@@ -1375,9 +1367,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
         continue;
       }
 
-      // Remember REG_SEQUENCE instructions, we'll deal with them later.
+      // Expand REG_SEQUENCE instructions. This will position mi at the first
+      // expanded instruction.
       if (mi->isRegSequence())
-        RegSequences.push_back(&*mi);
+        eliminateRegSequence(mi);
 
       DistanceMap.insert(std::make_pair(mi, ++Dist));
 
@@ -1444,192 +1437,81 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
     }
   }
 
-  // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
-  // SSA form. It's now safe to de-SSA.
-  MadeChange |= eliminateRegSequences();
-
   return MadeChange;
 }
 
-static void UpdateRegSequenceSrcs(unsigned SrcReg,
-                                  unsigned DstReg, unsigned SubIdx,
-                                  MachineRegisterInfo *MRI,
-                                  const TargetRegisterInfo &TRI) {
-  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
-         RE = MRI->reg_end(); RI != RE; ) {
-    MachineOperand &MO = RI.getOperand();
-    ++RI;
-    MO.substVirtReg(DstReg, SubIdx, TRI);
-  }
-}
-
-// Find the first def of Reg, assuming they are all in the same basic block.
-static MachineInstr *findFirstDef(unsigned Reg, MachineRegisterInfo *MRI) {
-  SmallPtrSet<MachineInstr*, 8> Defs;
-  MachineInstr *First = 0;
-  for (MachineRegisterInfo::def_iterator RI = MRI->def_begin(Reg);
-       MachineInstr *MI = RI.skipInstruction(); Defs.insert(MI))
-    First = MI;
-  if (!First)
-    return 0;
-
-  MachineBasicBlock *MBB = First->getParent();
-  MachineBasicBlock::iterator A = First, B = First;
-  bool Moving;
-  do {
-    Moving = false;
-    if (A != MBB->begin()) {
-      Moving = true;
-      --A;
-      if (Defs.erase(A)) First = A;
-    }
-    if (B != MBB->end()) {
-      Defs.erase(B);
-      ++B;
-      Moving = true;
-    }
-  } while (Moving && !Defs.empty());
-  assert(Defs.empty() && "Instructions outside basic block!");
-  return First;
-}
-
-static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq,
-                                    MachineRegisterInfo *MRI) {
-  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
-         UE = MRI->use_end(); UI != UE; ++UI) {
-    MachineInstr *UseMI = &*UI;
-    if (UseMI != RegSeq && UseMI->isRegSequence())
-      return true;
-  }
-  return false;
-}
-
-/// eliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
-/// of the de-ssa process. This replaces sources of REG_SEQUENCE as
-/// sub-register references of the register defined by REG_SEQUENCE. e.g.
+/// Eliminate a REG_SEQUENCE instruction as part of the de-ssa process.
 ///
-/// %reg1029<def>, %reg1030<def> = VLD1q16 %reg1024<kill>, ...
-/// %reg1031<def> = REG_SEQUENCE %reg1029<kill>, 5, %reg1030<kill>, 6
-/// =>
-/// %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
-bool TwoAddressInstructionPass::eliminateRegSequences() {
-  if (RegSequences.empty())
-    return false;
-
-  for (unsigned i = 0, e = RegSequences.size(); i != e; ++i) {
-    MachineInstr *MI = RegSequences[i];
-    unsigned DstReg = MI->getOperand(0).getReg();
-    if (MI->getOperand(0).getSubReg() ||
-        TargetRegisterInfo::isPhysicalRegister(DstReg) ||
-        !(MI->getNumOperands() & 1)) {
-      DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
-      llvm_unreachable(0);
-    }
-
-    bool IsImpDef = true;
-    SmallVector<unsigned, 4> RealSrcs;
-    SmallSet<unsigned, 4> Seen;
-    for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
-      // Nothing needs to be inserted for <undef> operands.
-      if (MI->getOperand(i).isUndef()) {
-        MI->getOperand(i).setReg(0);
-        continue;
-      }
-      unsigned SrcReg = MI->getOperand(i).getReg();
-      unsigned SrcSubIdx = MI->getOperand(i).getSubReg();
-      unsigned SubIdx = MI->getOperand(i+1).getImm();
-      // DefMI of NULL means the value does not have a vreg in this block
-      // i.e., its a physical register or a subreg.
-      // In either case we force a copy to be generated.
-      MachineInstr *DefMI = NULL;
-      if (!MI->getOperand(i).getSubReg() &&
-          !TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
-        DefMI = MRI->getUniqueVRegDef(SrcReg);
-      }
+/// The instruction is turned into a sequence of sub-register copies:
+///
+///   %dst = REG_SEQUENCE %v1, ssub0, %v2, ssub1
+///
+/// Becomes:
+///
+///   %dst:ssub0<def,undef> = COPY %v1
+///   %dst:ssub1<def> = COPY %v2
+///
+void TwoAddressInstructionPass::
+eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
+  MachineInstr *MI = MBBI;
+  unsigned DstReg = MI->getOperand(0).getReg();
+  if (MI->getOperand(0).getSubReg() ||
+      TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+      !(MI->getNumOperands() & 1)) {
+    DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
+    llvm_unreachable(0);
+  }
 
-      if (DefMI && DefMI->isImplicitDef()) {
-        DefMI->eraseFromParent();
-        continue;
-      }
-      IsImpDef = false;
-
-      // Remember COPY sources. These might be candidate for coalescing.
-      if (DefMI && DefMI->isCopy() && DefMI->getOperand(1).getSubReg())
-        RealSrcs.push_back(DefMI->getOperand(1).getReg());
-
-      bool isKill = MI->getOperand(i).isKill();
-      if (!DefMI || !Seen.insert(SrcReg) ||
-          MI->getParent() != DefMI->getParent() ||
-          !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) ||
-          !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg),
-                                         MRI->getRegClass(SrcReg), SubIdx)) {
-        // REG_SEQUENCE cannot have duplicated operands, add a copy.
-        // Also add an copy if the source is live-in the block. We don't want
-        // to end up with a partial-redef of a livein, e.g.
-        // BB0:
-        // reg1051:10<def> =
-        // ...
-        // BB1:
-        // ... = reg1051:10
-        // BB2:
-        // reg1051:9<def> =
-        // LiveIntervalAnalysis won't like it.
-        //
-        // If the REG_SEQUENCE doesn't kill its source, keeping live variables
-        // correctly up to date becomes very difficult. Insert a copy.
-
-        // Defer any kill flag to the last operand using SrcReg. Otherwise, we
-        // might insert a COPY that uses SrcReg after is was killed.
-        if (isKill)
-          for (unsigned j = i + 2; j < e; j += 2)
-            if (MI->getOperand(j).getReg() == SrcReg) {
-              MI->getOperand(j).setIsKill();
-              isKill = false;
-              break;
-            }
+  bool DefEmitted = false;
+  for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
+    MachineOperand &UseMO = MI->getOperand(i);
+    unsigned SrcReg = UseMO.getReg();
+    unsigned SubIdx = MI->getOperand(i+1).getImm();
+    // Nothing needs to be inserted for <undef> operands.
+    if (UseMO.isUndef())
+      continue;
 
-        MachineBasicBlock::iterator InsertLoc = MI;
-        MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
-                                MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
-            .addReg(DstReg, RegState::Define, SubIdx)
-            .addReg(SrcReg, getKillRegState(isKill), SrcSubIdx);
-        MI->getOperand(i).setReg(0);
-        if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))
-          LV->replaceKillInstruction(SrcReg, MI, CopyMI);
-        DEBUG(dbgs() << "Inserted: " << *CopyMI);
-      }
-    }
+    // Defer any kill flag to the last operand using SrcReg. Otherwise, we
+    // might insert a COPY that uses SrcReg after is was killed.
+    bool isKill = UseMO.isKill();
+    if (isKill)
+      for (unsigned j = i + 2; j < e; j += 2)
+        if (MI->getOperand(j).getReg() == SrcReg) {
+          MI->getOperand(j).setIsKill();
+          UseMO.setIsKill(false);
+          isKill = false;
+          break;
+        }
 
-    for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
-      unsigned SrcReg = MI->getOperand(i).getReg();
-      if (!SrcReg) continue;
-      unsigned SubIdx = MI->getOperand(i+1).getImm();
-      UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI);
+    // Insert the sub-register copy.
+    MachineInstr *CopyMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+                                   TII->get(TargetOpcode::COPY))
+      .addReg(DstReg, RegState::Define, SubIdx)
+      .addOperand(UseMO);
+
+    // The first def needs an <undef> flag because there is no live register
+    // before it.
+    if (!DefEmitted) {
+      CopyMI->getOperand(0).setIsUndef(true);
+      // Return an iterator pointing to the first inserted instr.
+      MBBI = CopyMI;
     }
+    DefEmitted = true;
 
-    // Set <def,undef> flags on the first DstReg def in the basic block.
-    // It marks the beginning of the live range. All the other defs are
-    // read-modify-write.
-    if (MachineInstr *Def = findFirstDef(DstReg, MRI)) {
-      for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = Def->getOperand(i);
-        if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
-          MO.setIsUndef();
-      }
-      DEBUG(dbgs() << "First def: " << *Def);
-    }
+    // Update LiveVariables' kill info.
+    if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))
+      LV->replaceKillInstruction(SrcReg, MI, CopyMI);
 
-    if (IsImpDef) {
-      DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
-      MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
-      for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
-        MI->RemoveOperand(j);
-    } else {
-      DEBUG(dbgs() << "Eliminated: " << *MI);
-      MI->eraseFromParent();
-    }
+    DEBUG(dbgs() << "Inserted: " << *CopyMI);
   }
 
-  RegSequences.clear();
-  return true;
+  if (!DefEmitted) {
+    DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
+    MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+    for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
+      MI->RemoveOperand(j);
+  } else {
+    DEBUG(dbgs() << "Eliminated: " << *MI);
+    MI->eraseFromParent();
+  }
 }
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 52693f03e8..0f814b1f8d 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -21,22 +21,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Constant.h"
-#include "llvm/Instructions.h"
-#include "llvm/Function.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Constant.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index bb93bdc0bc..cd012d2974 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -17,8 +17,10 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "regalloc"
-#include "VirtRegMap.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "LiveDebugVariables.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -26,15 +28,13 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -77,15 +77,22 @@ unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
   return SS;
 }
 
-unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) {
-  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg);
-  unsigned physReg = Hint.second;
-  if (TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
-    physReg = getPhys(physReg);
-  if (Hint.first == 0)
-    return (TargetRegisterInfo::isPhysicalRegister(physReg))
-      ? physReg : 0;
-  return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF);
+bool VirtRegMap::hasPreferredPhys(unsigned VirtReg) {
+  unsigned Hint = MRI->getSimpleHint(VirtReg);
+  if (!Hint)
+    return 0;
+  if (TargetRegisterInfo::isVirtualRegister(Hint))
+    Hint = getPhys(Hint);
+  return getPhys(VirtReg) == Hint;
+}
+
+bool VirtRegMap::hasKnownPreference(unsigned VirtReg) {
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(VirtReg);
+  if (TargetRegisterInfo::isPhysicalRegister(Hint.second))
+    return true;
+  if (TargetRegisterInfo::isVirtualRegister(Hint.second))
+    return hasPhys(Hint.second);
+  return false;
 }
 
 int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp
index 5fa6534067..91ed2d22ab 100644
--- a/lib/DebugInfo/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARFContext.cpp
@@ -35,7 +35,7 @@ void DWARFContext::dump(raw_ostream &OS) {
     set.dump(OS);
 
   uint8_t savedAddressByteSize = 0;
-  OS << "\n.debug_lines contents:\n";
+  OS << "\n.debug_line contents:\n";
   for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) {
     DWARFCompileUnit *cu = getCompileUnitAtIndex(i);
     savedAddressByteSize = cu->getAddressByteSize();
diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h
index ff161e2aad..24613594de 100644
--- a/lib/DebugInfo/DWARFContext.h
+++ b/lib/DebugInfo/DWARFContext.h
@@ -14,9 +14,9 @@
 #include "DWARFDebugAranges.h"
 #include "DWARFDebugLine.h"
 #include "DWARFDebugRangeList.h"
-#include "llvm/DebugInfo/DIContext.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/DIContext.h"
 
 namespace llvm {
 
diff --git a/lib/ExecutionEngine/EventListenerCommon.h b/lib/ExecutionEngine/EventListenerCommon.h
index 911d1d68b2..cd2124c6e3 100644
--- a/lib/ExecutionEngine/EventListenerCommon.h
+++ b/lib/ExecutionEngine/EventListenerCommon.h
@@ -14,11 +14,11 @@
 #ifndef EVENT_LISTENER_COMMON_H
 #define EVENT_LISTENER_COMMON_H
 
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/Metadata.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/ValueHandle.h"
 
 namespace llvm {
 
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 05987f2b74..21e0ad3fcd 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -14,22 +14,21 @@
 
 #define DEBUG_TYPE "jit"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
-
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Module.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cmath>
 #include <cstring>
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 1e790e781d..f4e8246476 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -13,8 +13,8 @@
 
 #define DEBUG_TYPE "jit"
 #include "llvm-c/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cstring>
 
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 5202b09165..63011c9c88 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -13,16 +13,16 @@
 
 #define DEBUG_TYPE "interpreter"
 #include "Interpreter.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
-#include "llvm/CodeGen/IntrinsicLowering.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
 #include <cmath>
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index e3b90fdf78..2225f206b0 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -20,19 +20,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "Interpreter.h"
+#include "llvm/Config/config.h"     // Detect libffi
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
-#include "llvm/Config/config.h"     // Detect libffi
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/DynamicLibrary.h"
-#include "llvm/DataLayout.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
+#include <cmath>
 #include <csignal>
 #include <cstdio>
-#include <map>
-#include <cmath>
 #include <cstring>
+#include <map>
 
 #ifdef HAVE_FFI_CALL
 #ifdef HAVE_FFI_H
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index 72c42c15db..0c03760494 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -14,14 +14,14 @@
 #ifndef LLI_INTERPRETER_H
 #define LLI_INTERPRETER_H
 
-#include "llvm/Function.h"
+#include "llvm/DataLayout.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
-#include "llvm/DataLayout.h"
+#include "llvm/Function.h"
+#include "llvm/InstVisitor.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/raw_ostream.h"
 namespace llvm {
 
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 1ad338203a..2a7dfa00b6 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -13,26 +13,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "JIT.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/Config/config.h"
+#include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MutexGuard.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Config/config.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index 19c197903a..29dac39f50 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -12,21 +12,21 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "JIT.h"
 #include "JITDwarfEmitter.h"
-#include "llvm/Function.h"
+#include "JIT.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/DataLayout.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
-#include "llvm/MC/MachineLocation.h"
+#include "llvm/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
index 9cdbeac86a..98ac340491 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
@@ -15,9 +15,13 @@
 #ifndef LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
 #define LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
 
+#include "llvm/Support/DataTypes.h"
+#include <vector>
+
 namespace llvm {
 
 class Function;
+class JIT;
 class JITCodeEmitter;
 class MachineFunction;
 class MachineModuleInfo;
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index ecafda7286..c7a5b95147 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -15,39 +15,39 @@
 #define DEBUG_TYPE "jit"
 #include "JIT.h"
 #include "JITDwarfEmitter.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/ValueMap.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineCodeInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetJITInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Module.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Disassembler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Memory.h"
 #include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Disassembler.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/ValueMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 #include <algorithm>
 #ifndef NDEBUG
 #include <iomanip>
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index bd0519e9c4..0ca3d6c71a 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -16,20 +16,20 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Memory.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Config/config.h"
-#include <vector>
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <climits>
 #include <cstring>
+#include <vector>
 
 #if defined(__linux__)
 #if defined(HAVE_SYS_STAT_H)
diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
index 2911a50772..088635a0e9 100644
--- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -1,3 +1,4 @@
 add_llvm_library(LLVMMCJIT
   MCJIT.cpp
+  SectionMemoryManager.cpp
   )
diff --git a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
index 90f4d2f75e..900460bf1c 100644
--- a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
+++ b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = MCJIT
 parent = ExecutionEngine
-required_libraries = Core ExecutionEngine RuntimeDyld Support Target
+required_libraries = Core ExecutionEngine RuntimeDyld Support Target JIT
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index d72e56378b..e1ac194975 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -8,20 +8,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCJIT.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/ExecutionEngine/MCJIT.h"
 #include "llvm/ExecutionEngine/ObjectBuffer.h"
 #include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/MutexGuard.h"
-#include "llvm/DataLayout.h"
 
 using namespace llvm;
 
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index 571080d2bd..283a8e5281 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -10,10 +10,10 @@
 #ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_H
 #define LLVM_LIB_EXECUTIONENGINE_MCJIT_H
 
-#include "llvm/PassManager.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/PassManager.h"
 
 namespace llvm {
 
diff --git a/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
new file mode 100644
index 0000000000..fa35acd389
--- /dev/null
+++ b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
@@ -0,0 +1,226 @@
+//===- SectionMemoryManager.cpp - Memory manager for MCJIT/RtDyld *- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the section-based memory manager used by the MCJIT
+// execution engine and RuntimeDyld
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/MathExtras.h"
+
+#ifdef __linux__
+  // These includes used by SectionMemoryManager::getPointerToNamedFunction()
+  // for Glibc trickery. See comments in this function for more information.
+  #ifdef HAVE_SYS_STAT_H
+    #include <sys/stat.h>
+  #endif
+  #include <fcntl.h>
+  #include <unistd.h>
+#endif
+
+namespace llvm {
+
+uint8_t *SectionMemoryManager::allocateDataSection(uintptr_t Size,
+                                                    unsigned Alignment,
+                                                    unsigned SectionID,
+                                                    bool IsReadOnly) {
+  if (IsReadOnly)
+    return allocateSection(RODataMem, Size, Alignment);
+  return allocateSection(RWDataMem, Size, Alignment);
+}
+
+uint8_t *SectionMemoryManager::allocateCodeSection(uintptr_t Size,
+                                                   unsigned Alignment,
+                                                   unsigned SectionID) {
+  return allocateSection(CodeMem, Size, Alignment);
+}
+
+uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup,
+                                               uintptr_t Size,
+                                               unsigned Alignment) {
+  if (!Alignment)
+    Alignment = 16;
+
+  assert(!(Alignment & (Alignment - 1)) && "Alignment must be a power of two.");
+
+  uintptr_t RequiredSize = Alignment * ((Size + Alignment - 1)/Alignment + 1);
+  uintptr_t Addr = 0;
+
+  // Look in the list of free memory regions and use a block there if one
+  // is available.
+  for (int i = 0, e = MemGroup.FreeMem.size(); i != e; ++i) {
+    sys::MemoryBlock &MB = MemGroup.FreeMem[i];
+    if (MB.size() >= RequiredSize) {
+      Addr = (uintptr_t)MB.base();
+      uintptr_t EndOfBlock = Addr + MB.size();
+      // Align the address.
+      Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
+      // Store cutted free memory block.
+      MemGroup.FreeMem[i] = sys::MemoryBlock((void*)(Addr + Size),
+                                             EndOfBlock - Addr - Size);
+      return (uint8_t*)Addr;
+    }
+  }
+
+  // No pre-allocated free block was large enough. Allocate a new memory region.
+  // Note that all sections get allocated as read-write.  The permissions will
+  // be updated later based on memory group.
+  //
+  // FIXME: It would be useful to define a default allocation size (or add
+  // it as a constructor parameter) to minimize the number of allocations.
+  //
+  // FIXME: Initialize the Near member for each memory group to avoid
+  // interleaving.
+  error_code ec;
+  sys::MemoryBlock MB = sys::Memory::allocateMappedMemory(RequiredSize,
+                                                          &MemGroup.Near,
+                                                          sys::Memory::MF_READ |
+                                                            sys::Memory::MF_WRITE,
+                                                          ec);
+  if (ec) {
+    // FIXME: Add error propogation to the interface.
+    return NULL;
+  }
+
+  // Save this address as the basis for our next request
+  MemGroup.Near = MB;
+
+  MemGroup.AllocatedMem.push_back(MB);
+  Addr = (uintptr_t)MB.base();
+  uintptr_t EndOfBlock = Addr + MB.size();
+
+  // Align the address.
+  Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
+
+  // The allocateMappedMemory may allocate much more memory than we need. In
+  // this case, we store the unused memory as a free memory block.
+  unsigned FreeSize = EndOfBlock-Addr-Size;
+  if (FreeSize > 16)
+    MemGroup.FreeMem.push_back(sys::MemoryBlock((void*)(Addr + Size), FreeSize));
+
+  // Return aligned address
+  return (uint8_t*)Addr;
+}
+
+bool SectionMemoryManager::applyPermissions(std::string *ErrMsg)
+{
+  // FIXME: Should in-progress permissions be reverted if an error occurs?
+  error_code ec;
+
+  // Make code memory executable.
+  ec = applyMemoryGroupPermissions(CodeMem,
+                                   sys::Memory::MF_READ | sys::Memory::MF_EXEC);
+  if (ec) {
+    if (ErrMsg) {
+      *ErrMsg = ec.message();
+    }
+    return true;
+  }
+
+  // Make read-only data memory read-only.
+  ec = applyMemoryGroupPermissions(RODataMem,
+                                   sys::Memory::MF_READ | sys::Memory::MF_EXEC);
+  if (ec) {
+    if (ErrMsg) {
+      *ErrMsg = ec.message();
+    }
+    return true;
+  }
+
+  // Read-write data memory already has the correct permissions
+
+  return false;
+}
+
+error_code SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup,
+                                                             unsigned Permissions) {
+
+  for (int i = 0, e = MemGroup.AllocatedMem.size(); i != e; ++i) {
+      error_code ec;
+      ec = sys::Memory::protectMappedMemory(MemGroup.AllocatedMem[i],
+                                            Permissions);
+      if (ec) {
+        return ec;
+      }
+  }
+
+  return error_code::success();
+}
+
+void SectionMemoryManager::invalidateInstructionCache() {
+  for (int i = 0, e = CodeMem.AllocatedMem.size(); i != e; ++i)
+    sys::Memory::InvalidateInstructionCache(CodeMem.AllocatedMem[i].base(),
+                                            CodeMem.AllocatedMem[i].size());
+}
+
+static int jit_noop() {
+  return 0;
+}
+
+void *SectionMemoryManager::getPointerToNamedFunction(const std::string &Name,
+                                                       bool AbortOnFailure) {
+#if defined(__linux__)
+  //===--------------------------------------------------------------------===//
+  // Function stubs that are invoked instead of certain library calls
+  //
+  // Force the following functions to be linked in to anything that uses the
+  // JIT. This is a hack designed to work around the all-too-clever Glibc
+  // strategy of making these functions work differently when inlined vs. when
+  // not inlined, and hiding their real definitions in a separate archive file
+  // that the dynamic linker can't see. For more info, search for
+  // 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+  if (Name == "stat") return (void*)(intptr_t)&stat;
+  if (Name == "fstat") return (void*)(intptr_t)&fstat;
+  if (Name == "lstat") return (void*)(intptr_t)&lstat;
+  if (Name == "stat64") return (void*)(intptr_t)&stat64;
+  if (Name == "fstat64") return (void*)(intptr_t)&fstat64;
+  if (Name == "lstat64") return (void*)(intptr_t)&lstat64;
+  if (Name == "atexit") return (void*)(intptr_t)&atexit;
+  if (Name == "mknod") return (void*)(intptr_t)&mknod;
+#endif // __linux__
+
+  // We should not invoke parent's ctors/dtors from generated main()!
+  // On Mingw and Cygwin, the symbol __main is resolved to
+  // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+  // (and register wrong callee's dtors with atexit(3)).
+  // We expect ExecutionEngine::runStaticConstructorsDestructors()
+  // is called before ExecutionEngine::runFunctionAsMain() is called.
+  if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+  const char *NameStr = Name.c_str();
+  void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+  if (Ptr) return Ptr;
+
+  // If it wasn't found and if it starts with an underscore ('_') character,
+  // try again without the underscore.
+  if (NameStr[0] == '_') {
+    Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+    if (Ptr) return Ptr;
+  }
+
+  if (AbortOnFailure)
+    report_fatal_error("Program used external function '" + Name +
+                      "' which could not be resolved!");
+  return 0;
+}
+
+SectionMemoryManager::~SectionMemoryManager() {
+  for (unsigned i = 0, e = CodeMem.AllocatedMem.size(); i != e; ++i)
+    sys::Memory::releaseMappedMemory(CodeMem.AllocatedMem[i]);
+  for (unsigned i = 0, e = RWDataMem.AllocatedMem.size(); i != e; ++i)
+    sys::Memory::releaseMappedMemory(RWDataMem.AllocatedMem[i]);
+  for (unsigned i = 0, e = RODataMem.AllocatedMem.size(); i != e; ++i)
+    sys::Memory::releaseMappedMemory(RODataMem.AllocatedMem[i]);
+}
+
+} // namespace llvm
+
diff --git a/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp
index 50cd0724ea..205ac2342c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp
@@ -9,10 +9,10 @@
 
 #include "JITRegistrar.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/MutexGuard.h"
-#include "llvm/Support/Mutex.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/MutexGuard.h"
 
 using namespace llvm;
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
index 097df35a5c..28b4f0f84a 100644
--- a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
+++ b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H
 #define LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H
 
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/ExecutionEngine/ObjectImage.h"
 #include "llvm/ExecutionEngine/ObjectBuffer.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/Object/ObjectFile.h"
 
 namespace llvm {
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index e6e1bdc8b1..f348a33b38 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -12,12 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "dyld"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "ObjectImageCommon.h"
-#include "RuntimeDyldImpl.h"
 #include "RuntimeDyldELF.h"
+#include "RuntimeDyldImpl.h"
 #include "RuntimeDyldMachO.h"
-#include "llvm/Support/Path.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"
 
 using namespace llvm;
 using namespace llvm::object;
@@ -346,7 +347,7 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
     uint32_t *StubAddr = (uint32_t*)Addr;
     *StubAddr = 0xe51ff004; // ldr pc,<label>
     return (uint8_t*)++StubAddr;
-  } else if (Arch == Triple::mipsel) {
+  } else if (Arch == Triple::mipsel || Arch == Triple::mips) {
     uint32_t *StubAddr = (uint32_t*)Addr;
     // 0:   3c190000        lui     t9,%hi(addr).
     // 4:   27390000        addiu   t9,t9,%lo(addr).
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 74bb46dfcd..f32e3871b8 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -15,16 +15,16 @@
 #include "RuntimeDyldELF.h"
 #include "JITRegistrar.h"
 #include "ObjectImageCommon.h"
+#include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/IntervalMap.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/ExecutionEngine/ObjectImage.h"
-#include "llvm/ExecutionEngine/ObjectBuffer.h"
-#include "llvm/Support/ELF.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/ObjectBuffer.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
 #include "llvm/Object/ELF.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/ELF.h"
 using namespace llvm;
 using namespace llvm::object;
 
@@ -677,7 +677,8 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
                         RelType, 0);
       Section.StubOffset += getMaxStubSize();
     }
-  } else if (Arch == Triple::mipsel && RelType == ELF::R_MIPS_26) {
+  } else if ((Arch == Triple::mipsel || Arch == Triple::mips) &&
+             RelType == ELF::R_MIPS_26) {
     // This is an Mips branch relocation, need to use a stub function.
     DEBUG(dbgs() << "\t\tThis is a Mips branch relocation.");
     SectionEntry &Section = Sections[Rel.SectionID];
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 829fd6c4c9..f100994577 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -14,12 +14,12 @@
 #ifndef LLVM_RUNTIME_DYLD_IMPL_H
 #define LLVM_RUNTIME_DYLD_IMPL_H
 
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
-#include "llvm/ExecutionEngine/ObjectImage.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -168,7 +168,7 @@ protected:
   inline unsigned getMaxStubSize() {
     if (Arch == Triple::arm || Arch == Triple::thumb)
       return 8; // 32-bit instruction and 32-bit address
-    else if (Arch == Triple::mipsel)
+    else if (Arch == Triple::mipsel || Arch == Triple::mips)
       return 16;
     else if (Arch == Triple::ppc64)
       return 44;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index 987c0c3afc..d2310b57e0 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "dyld"
+#include "RuntimeDyldMachO.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/STLExtras.h"
-#include "RuntimeDyldMachO.h"
+#include "llvm/ADT/StringRef.h"
 using namespace llvm;
 using namespace llvm::object;
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index fe3539dff6..62d8487078 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -14,10 +14,10 @@
 #ifndef LLVM_RUNTIME_DYLD_MACHO_H
 #define LLVM_RUNTIME_DYLD_MACHO_H
 
+#include "RuntimeDyldImpl.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/Object/MachOObject.h"
 #include "llvm/Support/Format.h"
-#include "RuntimeDyldImpl.h"
 
 using namespace llvm;
 using namespace llvm::object;
diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp
index 8b6104fdca..4e52a98c33 100644
--- a/lib/ExecutionEngine/TargetSelect.cpp
+++ b/lib/ExecutionEngine/TargetSelect.cpp
@@ -15,13 +15,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
@@ -32,8 +32,18 @@ TargetMachine *EngineBuilder::selectTarget() {
   // must use the host architecture.
   if (UseMCJIT && WhichEngine != EngineKind::Interpreter && M)
     TT.setTriple(M->getTargetTriple());
-  else
+  else {
     TT.setTriple(LLVM_HOSTTRIPLE);
+#if defined(__APPLE__)
+#if defined(__LP64__)
+    if (TT.isArch32Bit())
+      TT = TT.get64BitArchVariant();
+#else
+    if (TT.isArch64Bit())
+      TT = TT.get32BitArchVariant();
+#endif
+#endif // APPLE
+  }
   return selectTarget(TT, MArch, MCPU, MAttrs);
 }
 
diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt
index f7f814b9cb..02127820f6 100644
--- a/lib/LLVMBuild.txt
+++ b/lib/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker MC Object Support TableGen Target Transforms VMCore Wrap
+subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker MC Object Option Support TableGen Target Transforms VMCore Wrap
 
 [component_0]
 type = Group
diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp
index c5656a54c9..9d5e5b5c80 100644
--- a/lib/Linker/LinkArchives.cpp
+++ b/lib/Linker/LinkArchives.cpp
@@ -13,10 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Linker.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/Bitcode/Archive.h"
-
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h" // @LOCALMOD
 
 #include <memory>
diff --git a/lib/Linker/LinkItems.cpp b/lib/Linker/LinkItems.cpp
index 52a0d175a5..124e5e4e4d 100644
--- a/lib/Linker/LinkItems.cpp
+++ b/lib/Linker/LinkItems.cpp
@@ -13,11 +13,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Linker.h"
-#include "llvm/Module.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Support/Path.h"
+#include "llvm/Module.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/system_error.h"
 using namespace llvm;
 
@@ -51,20 +51,6 @@ Linker::LinkInItems(const ItemList& Items, ItemList& NativeItems) {
     }
   }
 
-  // At this point we have processed all the link items provided to us. Since
-  // we have an aggregated module at this point, the dependent libraries in
-  // that module should also be aggregated with duplicates eliminated. This is
-  // now the time to process the dependent libraries to resolve any remaining
-  // symbols.
-  bool is_native;
-  for (Module::lib_iterator I = Composite->lib_begin(),
-         E = Composite->lib_end(); I != E; ++I) {
-    if(LinkInLibrary(*I, is_native))
-      return true;
-    if (is_native)
-      NativeItems.push_back(std::make_pair(*I, true));
-  }
-
   return false;
 }
 
@@ -128,17 +114,6 @@ bool Linker::LinkInLibraries(const std::vector<std::string> &Libraries) {
     if (LinkInLibrary(Libraries[i], is_native))
       return true;
 
-  // At this point we have processed all the libraries provided to us. Since
-  // we have an aggregated module at this point, the dependent libraries in
-  // that module should also be aggregated with duplicates eliminated. This is
-  // now the time to process the dependent libraries to resolve any remaining
-  // symbols.
-  const Module::LibraryListType& DepLibs = Composite->getLibraries();
-  for (Module::LibraryListType::const_iterator I = DepLibs.begin(),
-         E = DepLibs.end(); I != E; ++I)
-    if (LinkInLibrary(*I, is_native))
-      return true;
-
   return false;
 }
 
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index b3426fb19f..6554d9c429 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -12,21 +12,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Linker.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/TypeFinder.h"
+#include "llvm-c/Linker.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm-c/Linker.h"
+#include "llvm/TypeFinder.h"
 #include <cctype>
 using namespace llvm;
 
@@ -1207,19 +1207,6 @@ bool ModuleLinker::run() {
                                SrcM->getModuleInlineAsm());
   }
 
-  // Update the destination module's dependent libraries list with the libraries
-  // from the source module. There's no opportunity for duplicates here as the
-  // Module ensures that duplicate insertions are discarded.
-  for (Module::lib_iterator SI = SrcM->lib_begin(), SE = SrcM->lib_end();
-       SI != SE; ++SI)
-    DstM->addLibrary(*SI);
-  
-  // If the source library's module id is in the dependent library list of the
-  // destination library, remove it since that module is now linked in.
-  StringRef ModuleId = SrcM->getModuleIdentifier();
-  if (!ModuleId.empty())
-    DstM->removeLibrary(sys::path::stem(ModuleId));
-  
   // Loop over all of the linked values to compute type mappings.
   computeTypeMapping();
 
diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp
index 7c6cf4f3dd..ffe79d29c1 100644
--- a/lib/Linker/Linker.cpp
+++ b/lib/Linker/Linker.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Linker.h"
-#include "llvm/Module.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Support/Path.h"
+#include "llvm/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 using namespace llvm;
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index a94d51bb74..b06784f789 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -11,17 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCELFObjectWriter.h"
 #include "MCELF.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCELFSymbolFlags.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixupKindInfo.h"
@@ -29,9 +29,8 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ELF.h"
-
+#include "llvm/Support/ErrorHandling.h"
 #include <vector>
 using namespace llvm;
 
@@ -204,7 +203,7 @@ class ELFObjectWriter : public MCObjectWriter {
     void String8(MCDataFragment &F, uint8_t Value) {
       char buf[1];
       buf[0] = Value;
-      F.getContents() += StringRef(buf, 1);
+      F.getContents().append(&buf[0], &buf[1]);
     }
 
     void String16(MCDataFragment &F, uint16_t Value) {
@@ -213,7 +212,7 @@ class ELFObjectWriter : public MCObjectWriter {
         StringLE16(buf, Value);
       else
         StringBE16(buf, Value);
-      F.getContents() += StringRef(buf, 2);
+      F.getContents().append(&buf[0], &buf[2]);
     }
 
     void String32(MCDataFragment &F, uint32_t Value) {
@@ -222,7 +221,7 @@ class ELFObjectWriter : public MCObjectWriter {
         StringLE32(buf, Value);
       else
         StringBE32(buf, Value);
-      F.getContents() += StringRef(buf, 4);
+      F.getContents().append(&buf[0], &buf[4]);
     }
 
     void String64(MCDataFragment &F, uint64_t Value) {
@@ -231,7 +230,7 @@ class ELFObjectWriter : public MCObjectWriter {
         StringLE64(buf, Value);
       else
         StringBE64(buf, Value);
-      F.getContents() += StringRef(buf, 8);
+      F.getContents().append(&buf[0], &buf[8]);
     }
 
     void WriteHeader(uint64_t SectionDataSize,
@@ -1187,7 +1186,7 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
   // The first entry of a string table holds a null character so skip
   // section 0.
   uint64_t Index = 1;
-  F->getContents() += '\x00';
+  F->getContents().push_back('\x00');
 
   for (unsigned int I = 0, E = Sections.size(); I != E; ++I) {
     const MCSectionELF &Section = *Sections[I];
@@ -1205,8 +1204,8 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
     SectionStringTableIndex[&Section] = Index;
 
     Index += Name.size() + 1;
-    F->getContents() += Name;
-    F->getContents() += '\x00';
+    F->getContents().append(Name.begin(), Name.end());
+    F->getContents().push_back('\x00');
   }
 }
 
@@ -1381,7 +1380,7 @@ void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm,
          ++i) {
       const MCFragment &F = *i;
       assert(F.getKind() == MCFragment::FT_Data);
-      WriteBytes(cast<MCDataFragment>(F).getContents().str());
+      WriteBytes(cast<MCDataFragment>(F).getContents());
     }
   } else {
     Asm.writeSectionData(&SD, Layout);
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 3309bb3e64..bca816e963 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -8,6 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
@@ -20,15 +25,10 @@
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/PathV2.h"
 #include <cctype>
 using namespace llvm;
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 477bd17c0d..23ec0bb12d 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -8,21 +8,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCContext.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCLabel.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCLabel.h"
-#include "llvm/MC/MCDwarf.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
 using namespace llvm;
 
 typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
@@ -31,12 +31,14 @@ typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
 
 
 MCContext::MCContext(const MCAsmInfo &mai, const MCRegisterInfo &mri,
-                     const MCObjectFileInfo *mofi, const SourceMgr *mgr) :
+                     const MCObjectFileInfo *mofi, const SourceMgr *mgr,
+                     bool DoAutoInitializationFinalization ) :
   SrcMgr(mgr), MAI(mai), MRI(mri), MOFI(mofi),
   Allocator(), Symbols(Allocator), UsedNames(Allocator),
   NextUniqueID(0),
   CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0),
-  AllowTemporaryLabels(true) {
+  AllowTemporaryLabels(true),
+  AutoInitializationFinalization(DoAutoInitializationFinalization) {
   MachOUniquingMap = 0;
   ELFUniquingMap = 0;
   COFFUniquingMap = 0;
@@ -45,22 +47,54 @@ MCContext::MCContext(const MCAsmInfo &mai, const MCRegisterInfo &mri,
   SecureLog = 0;
   SecureLogUsed = false;
 
-  DwarfLocSeen = false;
-  GenDwarfForAssembly = false;
-  GenDwarfFileNumber = 0;
+  if (AutoInitializationFinalization)
+    doInitialization();
 }
 
 MCContext::~MCContext() {
+
+  if (AutoInitializationFinalization)
+    doFinalization();
+
   // NOTE: The symbols are all allocated out of a bump pointer allocator,
   // we don't need to free them here.
+  
+  // If the stream for the .secure_log_unique directive was created free it.
+  delete (raw_ostream*)SecureLog;
+}
+
+//===----------------------------------------------------------------------===//
+// Module Lifetime Management
+//===----------------------------------------------------------------------===//
+
+void MCContext::doInitialization() {
+  NextUniqueID = 0;
+  AllowTemporaryLabels = true;
+  DwarfLocSeen = false;
+  GenDwarfForAssembly = false;
+  GenDwarfFileNumber = 0;
+}
+
+void MCContext::doFinalization() {
+  UsedNames.clear();
+  Symbols.clear();
+  Allocator.Reset();
+  Instances.clear();
+  MCDwarfFiles.clear();
+  MCDwarfDirs.clear();
+  MCGenDwarfLabelEntries.clear();
+  DwarfDebugFlags = StringRef();
+  MCLineSections.clear();
+  MCLineSectionOrder.clear();
+  CurrentDwarfLoc = MCDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0);
 
   // If we have the MachO uniquing map, free it.
   delete (MachOUniqueMapTy*)MachOUniquingMap;
   delete (ELFUniqueMapTy*)ELFUniquingMap;
   delete (COFFUniqueMapTy*)COFFUniquingMap;
-
-  // If the stream for the .secure_log_unique directive was created free it.
-  delete (raw_ostream*)SecureLog;
+  MachOUniquingMap = 0;
+  ELFUniquingMap = 0;
+  COFFUniquingMap = 0;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index 490ca75bc8..ac583ac127 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -9,7 +9,6 @@
 
 #include "Disassembler.h"
 #include "llvm-c/Disassembler.h"
-
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler.h"
@@ -18,9 +17,9 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
 class Target;
@@ -34,29 +33,29 @@ using namespace llvm;
 // functions can all be passed as NULL.  If successful, this returns a
 // disassembler context.  If not, it returns NULL.
 //
-LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
-                                      int TagType, LLVMOpInfoCallback GetOpInfo,
-                                      LLVMSymbolLookupCallback SymbolLookUp) {
+LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
+                                         void *DisInfo, int TagType,
+                                         LLVMOpInfoCallback GetOpInfo,
+                                         LLVMSymbolLookupCallback SymbolLookUp){
   // Get the target.
   std::string Error;
-  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+  const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
   assert(TheTarget && "Unable to create target!");
 
   // Get the assembler info needed to setup the MCContext.
-  const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(TripleName);
+  const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(Triple);
   assert(MAI && "Unable to create target asm info!");
 
   const MCInstrInfo *MII = TheTarget->createMCInstrInfo();
   assert(MII && "Unable to create target instruction info!");
 
-  const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(TripleName);
+  const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple);
   assert(MRI && "Unable to create target register info!");
 
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
-  std::string CPU;
 
-  const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(TripleName, CPU,
+  const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(Triple, CPU,
                                                                 FeaturesStr);
   assert(STI && "Unable to create subtarget info!");
 
@@ -75,7 +74,7 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
                                                      *MAI, *MII, *MRI, *STI);
   assert(IP && "Unable to create instruction printer!");
 
-  LLVMDisasmContext *DC = new LLVMDisasmContext(TripleName, DisInfo, TagType,
+  LLVMDisasmContext *DC = new LLVMDisasmContext(Triple, DisInfo, TagType,
                                                 GetOpInfo, SymbolLookUp,
                                                 TheTarget, MAI, MRI,
                                                 STI, MII, Ctx, DisAsm, IP);
@@ -84,6 +83,13 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
   return DC;
 }
 
+LLVMDisasmContextRef LLVMCreateDisasm(const char *Triple, void *DisInfo,
+                                      int TagType, LLVMOpInfoCallback GetOpInfo,
+                                      LLVMSymbolLookupCallback SymbolLookUp) {
+  return LLVMCreateDisasmCPU(Triple, "", DisInfo, TagType, GetOpInfo,
+                             SymbolLookUp);
+}
+
 //
 // LLVMDisasmDispose() disposes of the disassembler specified by the context.
 //
@@ -183,5 +189,11 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DCR, uint64_t Options){
       IP->setUseMarkup(1);
       Options &= ~LLVMDisassembler_Option_UseMarkup;
   }
+  if (Options & LLVMDisassembler_Option_PrintImmHex){
+      LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+      MCInstPrinter *IP = DC->getIP();
+      IP->setPrintImmHex(1);
+      Options &= ~LLVMDisassembler_Option_PrintImmHex;
+  }
   return (Options == 0);
 }
diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h
index 322abd5d63..28cf04b95d 100644
--- a/lib/MC/MCDisassembler/Disassembler.h
+++ b/lib/MC/MCDisassembler/Disassembler.h
@@ -18,10 +18,10 @@
 #define LLVM_MC_DISASSEMBLER_H
 
 #include "llvm-c/Disassembler.h"
-#include <string>
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/raw_ostream.h"
+#include <string>
 
 namespace llvm {
 class MCContext;
diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp
index eed7a771b9..e667920791 100644
--- a/lib/MC/MCDisassembler/EDDisassembler.cpp
+++ b/lib/MC/MCDisassembler/EDDisassembler.cpp
@@ -23,12 +23,12 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCParser/AsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCTargetAsmLexer.h"
 #include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/Support/MemoryBuffer.h"
diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h
index 6f71908d2b..942b9067e6 100644
--- a/lib/MC/MCDisassembler/EDDisassembler.h
+++ b/lib/MC/MCDisassembler/EDDisassembler.h
@@ -17,12 +17,10 @@
 #define LLVM_EDDISASSEMBLER_H
 
 #include "EDInfo.h"
-
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Mutex.h"
-
+#include "llvm/Support/raw_ostream.h"
 #include <map>
 #include <set>
 #include <string>
diff --git a/lib/MC/MCDisassembler/EDInst.cpp b/lib/MC/MCDisassembler/EDInst.cpp
index 6057e169e3..4c4fdd2568 100644
--- a/lib/MC/MCDisassembler/EDInst.cpp
+++ b/lib/MC/MCDisassembler/EDInst.cpp
@@ -17,7 +17,6 @@
 #include "EDDisassembler.h"
 #include "EDOperand.h"
 #include "EDToken.h"
-
 #include "llvm/MC/EDInstInfo.h"
 #include "llvm/MC/MCInst.h"
 
diff --git a/lib/MC/MCDisassembler/EDInst.h b/lib/MC/MCDisassembler/EDInst.h
index 6b78dc826c..cc0b562130 100644
--- a/lib/MC/MCDisassembler/EDInst.h
+++ b/lib/MC/MCDisassembler/EDInst.h
@@ -16,8 +16,8 @@
 #ifndef LLVM_EDINST_H
 #define LLVM_EDINST_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
 #include <string>
 #include <vector>
 
diff --git a/lib/MC/MCDisassembler/EDToken.cpp b/lib/MC/MCDisassembler/EDToken.cpp
index 5f6c9df481..a7fb1eb3c1 100644
--- a/lib/MC/MCDisassembler/EDToken.cpp
+++ b/lib/MC/MCDisassembler/EDToken.cpp
@@ -15,9 +15,9 @@
 
 #include "EDToken.h"
 #include "EDDisassembler.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 EDToken::EDToken(StringRef str,
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 084fadec72..9fd608bd6f 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -8,24 +8,24 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCDwarf.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCExpr.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Config/config.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 // Given a special op, return the address skip amount (in units of
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 1a2602c854..b24deb9c51 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -11,23 +11,24 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCELFStreamer.h"
+
 #include "MCELF.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCELFSymbolFlags.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCObjectStreamer.h"
 #include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -35,117 +36,36 @@
 
 using namespace llvm;
 
-namespace {
-class MCELFStreamer : public MCObjectStreamer {
-public:
-  MCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                  raw_ostream &OS, MCCodeEmitter *Emitter)
-    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
-
-  MCELFStreamer(MCContext &Context, MCAsmBackend &TAB,
-                raw_ostream &OS, MCCodeEmitter *Emitter,
-                MCAssembler *Assembler)
-    : MCObjectStreamer(Context, TAB, OS, Emitter, Assembler) {}
-
-
-  ~MCELFStreamer() {}
-
-  /// @name MCStreamer Interface
-  /// @{
-
-  virtual void InitSections();
-  virtual void ChangeSection(const MCSection *Section);
-  virtual void EmitLabel(MCSymbol *Symbol);
-  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
-  virtual void EmitThumbFunc(MCSymbol *Func);
-  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
-  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
-  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
-  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
-    llvm_unreachable("ELF doesn't support this directive");
-  }
-  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
-                                unsigned ByteAlignment);
-  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
-    llvm_unreachable("ELF doesn't support this directive");
-  }
-
-  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
-    llvm_unreachable("ELF doesn't support this directive");
-  }
-
-  virtual void EmitCOFFSymbolType(int Type) {
-    llvm_unreachable("ELF doesn't support this directive");
-  }
-
-  virtual void EndCOFFSymbolDef() {
-    llvm_unreachable("ELF doesn't support this directive");
-  }
-
-  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
-     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
-     SD.setSize(Value);
-  }
-
-  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
-                                     unsigned ByteAlignment);
-
-  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
-                            uint64_t Size = 0, unsigned ByteAlignment = 0) {
-    llvm_unreachable("ELF doesn't support this directive");
-  }
-  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
-                              uint64_t Size, unsigned ByteAlignment = 0) {
-    llvm_unreachable("ELF doesn't support this directive");
-  }
-  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
-                             unsigned AddrSpace);
-
-  virtual void EmitFileDirective(StringRef Filename);
-
-  virtual void EmitTCEntry(const MCSymbol &S);
-
-  virtual void FinishImpl();
-
-private:
-  virtual void EmitInstToFragment(const MCInst &Inst);
-  virtual void EmitInstToData(const MCInst &Inst);
+inline void MCELFStreamer::SetSection(StringRef Section, unsigned Type,
+                                      unsigned Flags, SectionKind Kind) {
+  SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
+}
 
-  void fixSymbolsInTLSFixups(const MCExpr *expr);
+inline void MCELFStreamer::SetSectionData() {
+  SetSection(".data",
+             ELF::SHT_PROGBITS,
+             ELF::SHF_WRITE | ELF::SHF_ALLOC,
+             SectionKind::getDataRel());
+  EmitCodeAlignment(4, 0);
+}
 
-  struct LocalCommon {
-    MCSymbolData *SD;
-    uint64_t Size;
-    unsigned ByteAlignment;
-  };
-  std::vector<LocalCommon> LocalCommons;
+inline void MCELFStreamer::SetSectionText() {
+  SetSection(".text",
+             ELF::SHT_PROGBITS,
+             ELF::SHF_EXECINSTR | ELF::SHF_ALLOC,
+             SectionKind::getText());
+  EmitCodeAlignment(4, 0);
+}
 
-  SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
-  /// @}
-  void SetSection(StringRef Section, unsigned Type, unsigned Flags,
-                  SectionKind Kind) {
-    SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
-  }
+inline void MCELFStreamer::SetSectionBss() {
+  SetSection(".bss",
+             ELF::SHT_NOBITS,
+             ELF::SHF_WRITE | ELF::SHF_ALLOC,
+             SectionKind::getBSS());
+  EmitCodeAlignment(4, 0);
+}
 
-  void SetSectionData() {
-    SetSection(".data", ELF::SHT_PROGBITS,
-               ELF::SHF_WRITE |ELF::SHF_ALLOC,
-               SectionKind::getDataRel());
-    EmitCodeAlignment(4, 0);
-  }
-  void SetSectionText() {
-    SetSection(".text", ELF::SHT_PROGBITS,
-               ELF::SHF_EXECINSTR |
-               ELF::SHF_ALLOC, SectionKind::getText());
-    EmitCodeAlignment(4, 0);
-  }
-  void SetSectionBss() {
-    SetSection(".bss", ELF::SHT_NOBITS,
-               ELF::SHF_WRITE |
-               ELF::SHF_ALLOC, SectionKind::getBSS());
-    EmitCodeAlignment(4, 0);
-  }
-};
+MCELFStreamer::~MCELFStreamer() {
 }
 
 void MCELFStreamer::InitSections() {
@@ -192,7 +112,7 @@ void MCELFStreamer::EmitThumbFunc(MCSymbol *Func) {
   SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc);
 }
 
-void MCELFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+void MCELFStreamer::EmitAssignment(MCSymbol* Symbol, const MCExpr* Value) {
   // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
   // MCObjectStreamer.
   // FIXME: Lift context changes into super class.
@@ -341,6 +261,11 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
   SD.setSize(MCConstantExpr::Create(Size, getContext()));
 }
 
+void MCELFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+  SD.setSize(Value);
+}
+
 void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                           unsigned ByteAlignment) {
   // FIXME: Should this be caught and done earlier?
@@ -506,3 +431,33 @@ MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB,
     S->getAssembler().setNoExecStack(true);
   return S;
 }
+
+void MCELFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+  llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+  llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
+  llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::EmitCOFFSymbolType(int Type) {
+  llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::EndCOFFSymbolDef() {
+  llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                                 uint64_t Size, unsigned ByteAlignment) {
+  llvm_unreachable("ELF doesn't support this directive");
+}
+
+void MCELFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                                   uint64_t Size, unsigned ByteAlignment) {
+  llvm_unreachable("ELF doesn't support this directive");
+}
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index de2f375aab..7a63bace6c 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -209,6 +209,10 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_PPC_GAS_LO16: return "l";
   case VK_PPC_TPREL16_HA: return "tprel@ha";
   case VK_PPC_TPREL16_LO: return "tprel@l";
+  case VK_PPC_TOC16_HA: return "toc@ha";
+  case VK_PPC_TOC16_LO: return "toc@l";
+  case VK_PPC_GOT_TPREL16_DS: return "got@tprel";
+  case VK_PPC_TLS: return "tls";
   case VK_Mips_GPREL: return "GPREL";
   case VK_Mips_GOT_CALL: return "GOT_CALL";
   case VK_Mips_GOT16: return "GOT16";
diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp
index 41d90abeeb..73f30ffb52 100644
--- a/lib/MC/MCInstPrinter.cpp
+++ b/lib/MC/MCInstPrinter.cpp
@@ -8,10 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -50,3 +51,11 @@ StringRef MCInstPrinter::markup(StringRef a, StringRef b) const {
   else
     return b;
 }
+
+/// Utility function to print immediates in decimal or hex.
+format_object1<int64_t> MCInstPrinter::formatImm(const int64_t Value) const {
+  if (getPrintImmHex())
+    return format("0x%" PRIx64, Value);
+  else
+    return format("%" PRId64, Value);
+}
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 46579d7b1f..b29cc5b655 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -8,7 +8,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCStreamer.h"
-
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSectionMachO.h"
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 3338a17e5c..8da497220e 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -8,12 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
-#include "llvm/ADT/Triple.h"
 using namespace llvm;
 
 void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
@@ -392,6 +392,10 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
   DwarfMacroInfoSection =
     Ctx->getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0,
                        SectionKind::getMetadata());
+
+  // DWARF5 Experimental Debug Info
+
+  // Accelerator Tables
   DwarfAccelNamesSection =
     Ctx->getELFSection(".apple_names", ELF::SHT_PROGBITS, 0,
                        SectionKind::getMetadata());
@@ -404,6 +408,24 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
   DwarfAccelTypesSection =
     Ctx->getELFSection(".apple_types", ELF::SHT_PROGBITS, 0,
                        SectionKind::getMetadata());
+
+  // Fission Sections
+  DwarfInfoDWOSection =
+    Ctx->getELFSection(".debug_info.dwo", ELF::SHT_PROGBITS, 0,
+                       SectionKind::getMetadata());
+  DwarfAbbrevDWOSection =
+    Ctx->getELFSection(".debug_abbrev.dwo", ELF::SHT_PROGBITS, 0,
+                       SectionKind::getMetadata());
+  DwarfStrDWOSection =
+    Ctx->getELFSection(".debug_str.dwo", ELF::SHT_PROGBITS,
+                       ELF::SHF_MERGE | ELF::SHF_STRINGS,
+                       SectionKind::getMergeable1ByteCString());
+  DwarfLineDWOSection =
+    Ctx->getELFSection(".debug_line.dwo", ELF::SHT_PROGBITS, 0,
+                       SectionKind::getMetadata());
+  DwarfLocDWOSection =
+    Ctx->getELFSection(".debug_loc.dwo", ELF::SHT_PROGBITS, 0,
+                       SectionKind::getMetadata());
 }
 
 
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index f93f685bf5..d0492fd16c 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCParser/AsmLexer.h"
-#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SMLoc.h"
 #include <cctype>
 #include <cerrno>
 #include <cstdio>
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index ab554189ee..832e7cde7b 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -78,11 +78,14 @@ struct MacroInstantiation {
   /// The location of the instantiation.
   SMLoc InstantiationLoc;
 
+  /// The buffer where parsing should resume upon instantiation completion.
+  int ExitBuffer;
+
   /// The location where parsing should resume upon instantiation completion.
   SMLoc ExitLoc;
 
 public:
-  MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL,
+  MacroInstantiation(const Macro *M, SMLoc IL, int EB, SMLoc EL,
                      MemoryBuffer *I);
 };
 
@@ -252,7 +255,10 @@ private:
   /// \brief Reset the current lexer position to that given by \p Loc. The
   /// current token is not set; clients should ensure Lex() is called
   /// subsequently.
-  void JumpToLoc(SMLoc Loc);
+  ///
+  /// \param InBuffer If not -1, should be the known buffer id that contains the
+  /// location.
+  void JumpToLoc(SMLoc Loc, int InBuffer=-1);
 
   virtual void EatToEndOfStatement();
 
@@ -563,8 +569,12 @@ bool AsmParser::ProcessIncbinFile(const std::string &Filename) {
   return false;
 }
 
-void AsmParser::JumpToLoc(SMLoc Loc) {
-  CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
+void AsmParser::JumpToLoc(SMLoc Loc, int InBuffer) {
+  if (InBuffer != -1) {
+    CurBuffer = InBuffer;
+  } else {
+    CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
+  }
   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer());
 }
 
@@ -1660,9 +1670,11 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
   return false;
 }
 
-MacroInstantiation::MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL,
+MacroInstantiation::MacroInstantiation(const Macro *M, SMLoc IL,
+                                       int EB, SMLoc EL,
                                        MemoryBuffer *I)
-  : TheMacro(M), Instantiation(I), InstantiationLoc(IL), ExitLoc(EL)
+  : TheMacro(M), Instantiation(I), InstantiationLoc(IL), ExitBuffer(EB),
+    ExitLoc(EL)
 {
 }
 
@@ -1858,6 +1870,7 @@ bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc,
   // Create the macro instantiation object and add to the current macro
   // instantiation stack.
   MacroInstantiation *MI = new MacroInstantiation(M, NameLoc,
+                                                  CurBuffer,
                                                   getTok().getLoc(),
                                                   Instantiation);
   ActiveMacros.push_back(MI);
@@ -1872,7 +1885,7 @@ bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc,
 
 void AsmParser::HandleMacroExit() {
   // Jump to the EndOfStatement we should return to, and consume it.
-  JumpToLoc(ActiveMacros.back()->ExitLoc);
+  JumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer);
   Lex();
 
   // Pop the instantiation entry.
@@ -3552,6 +3565,7 @@ void AsmParser::InstantiateMacroLikeBody(Macro *M, SMLoc DirectiveLoc,
   // Create the macro instantiation object and add to the current macro
   // instantiation stack.
   MacroInstantiation *MI = new MacroInstantiation(M, DirectiveLoc,
+                                                  CurBuffer,
                                                   getTok().getLoc(),
                                                   Instantiation);
   ActiveMacros.push_back(MI);
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index c4cdc3c9f9..e7c564a243 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -12,11 +12,11 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/Support/COFF.h"
 using namespace llvm;
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index 18033d05eb..20c949dbda 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -8,15 +8,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 using namespace llvm;
diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp
index 6967feef24..a8b00cd84a 100644
--- a/lib/MC/MCParser/MCAsmParser.cpp
+++ b/lib/MC/MCParser/MCAsmParser.cpp
@@ -8,13 +8,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 MCAsmParser::MCAsmParser() : TargetParser(0), ShowParsedOperands(0) {
diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp
index a792d56317..ccf4a7dddf 100644
--- a/lib/MC/MCSection.cpp
+++ b/lib/MC/MCSection.cpp
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index cdfd5246f3..96d6d691d2 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -7,23 +7,24 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/LEB128.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cstdlib>
 using namespace llvm;
 
 MCStreamer::MCStreamer(MCContext &Ctx)
   : Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false),
-    CurrentW64UnwindInfo(0), LastSymbol(0) {
+    CurrentW64UnwindInfo(0), LastSymbol(0),
+    AutoInitSections(false) {
   const MCSection *section = NULL;
   SectionStack.push_back(std::make_pair(section, section));
 }
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
index 80a1f02ce6..f18828dd41 100644
--- a/lib/MC/MCSubtargetInfo.cpp
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -8,10 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 
diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp
index c05b4b17fc..c5b637c924 100644
--- a/lib/MC/MCWin64EH.cpp
+++ b/lib/MC/MCWin64EH.cpp
@@ -8,13 +8,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCWin64EH.h"
-#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCSectionCOFF.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
 
 namespace llvm {
 
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index a94b214022..cc8d2fb477 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -10,20 +10,19 @@
 #include "llvm/MC/MCMachObjectWriter.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCMachOSymbolFlags.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Object/MachOFormat.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-
 #include <vector>
 using namespace llvm;
 using namespace llvm::object;
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index f706cac8d3..01860c5d7f 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -13,28 +13,24 @@
 
 #define DEBUG_TYPE "WinCOFFObjectWriter"
 
-#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCWinCOFFObjectWriter.h"
-
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Support/COFF.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-
 #include "llvm/Support/TimeValue.h"
-
 #include <cstdio>
 
 using namespace llvm;
diff --git a/lib/Makefile b/lib/Makefile
index c59d77d009..c44a6a40e4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -11,7 +11,7 @@ LEVEL = ..
 include $(LEVEL)/Makefile.config
 
 PARALLEL_DIRS := VMCore AsmParser Bitcode Archive Analysis Transforms CodeGen \
-                Target ExecutionEngine Linker MC Object Wrap DebugInfo
+                Target ExecutionEngine Linker MC Object Option Wrap DebugInfo
 
 ifeq ($(NACL_SANDBOX),1)
   PARALLEL_DIRS := $(filter-out Archive Linker, \
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index 5b1e007e85..dafcb72735 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -13,8 +13,8 @@
 
 #include "llvm/Object/Archive.h"
 #include "llvm/ADT/APInt.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 
 using namespace llvm;
diff --git a/lib/Object/MachOObject.cpp b/lib/Object/MachOObject.cpp
index 00dea3fe47..a64db1c60f 100644
--- a/lib/Object/MachOObject.cpp
+++ b/lib/Object/MachOObject.cpp
@@ -8,14 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Object/MachOObject.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 using namespace llvm::object;
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 45aeaac6b8..da7615714e 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -12,12 +12,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/Triple.h"
 #include "llvm/Object/MachO.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Object/MachOFormat.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MemoryBuffer.h"
-
 #include <cctype>
 #include <cstring>
 #include <limits>
@@ -50,7 +49,15 @@ ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) {
   MachOObject *MachOObj = MachOObject::LoadFromBuffer(Buffer, &Err);
   if (!MachOObj)
     return NULL;
-  return new MachOObjectFile(Buffer, MachOObj, ec);
+  // MachOObject takes ownership of the Buffer we passed to it, and
+  // MachOObjectFile does, too, so we need to make sure they don't get the
+  // same object. A MemoryBuffer is cheap (it's just a reference to memory,
+  // not a copy of the memory itself), so just make a new copy here for
+  // the MachOObjectFile.
+  MemoryBuffer *NewBuffer =
+    MemoryBuffer::getMemBuffer(Buffer->getBuffer(),
+                               Buffer->getBufferIdentifier(), false);
+  return new MachOObjectFile(NewBuffer, MachOObj, ec);
 }
 
 /*===-- Symbols -----------------------------------------------------------===*/
@@ -474,7 +481,7 @@ error_code MachOObjectFile::getSectionName(DataRefImpl DRI,
                                            StringRef &Result) const {
   // FIXME: thread safety.
   static char result[34];
-  if (is64BitLoadCommand(MachOObj, DRI)) {
+  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
     InMemoryStruct<macho::Segment64LoadCommand> SLC;
     LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
     MachOObj->ReadSegment64LoadCommand(LCI, SLC);
@@ -501,7 +508,7 @@ error_code MachOObjectFile::getSectionName(DataRefImpl DRI,
 
 error_code MachOObjectFile::getSectionAddress(DataRefImpl DRI,
                                               uint64_t &Result) const {
-  if (is64BitLoadCommand(MachOObj, DRI)) {
+  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
     InMemoryStruct<macho::Section64> Sect;
     getSection64(DRI, Sect);
     Result = Sect->Address;
@@ -515,7 +522,7 @@ error_code MachOObjectFile::getSectionAddress(DataRefImpl DRI,
 
 error_code MachOObjectFile::getSectionSize(DataRefImpl DRI,
                                            uint64_t &Result) const {
-  if (is64BitLoadCommand(MachOObj, DRI)) {
+  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
     InMemoryStruct<macho::Section64> Sect;
     getSection64(DRI, Sect);
     Result = Sect->Size;
@@ -529,7 +536,7 @@ error_code MachOObjectFile::getSectionSize(DataRefImpl DRI,
 
 error_code MachOObjectFile::getSectionContents(DataRefImpl DRI,
                                                StringRef &Result) const {
-  if (is64BitLoadCommand(MachOObj, DRI)) {
+  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
     InMemoryStruct<macho::Section64> Sect;
     getSection64(DRI, Sect);
     Result = MachOObj->getData(Sect->Offset, Sect->Size);
@@ -543,7 +550,7 @@ error_code MachOObjectFile::getSectionContents(DataRefImpl DRI,
 
 error_code MachOObjectFile::getSectionAlignment(DataRefImpl DRI,
                                                 uint64_t &Result) const {
-  if (is64BitLoadCommand(MachOObj, DRI)) {
+  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
     InMemoryStruct<macho::Section64> Sect;
     getSection64(DRI, Sect);
     Result = uint64_t(1) << Sect->Align;
@@ -557,7 +564,7 @@ error_code MachOObjectFile::getSectionAlignment(DataRefImpl DRI,
 
 error_code MachOObjectFile::isSectionText(DataRefImpl DRI,
                                           bool &Result) const {
-  if (is64BitLoadCommand(MachOObj, DRI)) {
+  if (is64BitLoadCommand(MachOObj.get(), DRI)) {
     InMemoryStruct<macho::Section64> Sect;
     getSection64(DRI, Sect);
     Result = !strcmp(Sect->Name, "__text");
@@ -664,7 +671,7 @@ relocation_iterator MachOObjectFile::getSectionRelBegin(DataRefImpl Sec) const {
 }
 relocation_iterator MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const {
   uint32_t last_reloc;
-  if (is64BitLoadCommand(MachOObj, Sec)) {
+  if (is64BitLoadCommand(MachOObj.get(), Sec)) {
     InMemoryStruct<macho::Section64> Sect;
     getSection64(Sec, Sect);
     last_reloc = Sect->NumRelocationTableEntries;
diff --git a/lib/Option/Arg.cpp b/lib/Option/Arg.cpp
new file mode 100644
index 0000000000..9f547bd825
--- /dev/null
+++ b/lib/Option/Arg.cpp
@@ -0,0 +1,123 @@
+//===--- Arg.cpp - Argument Implementations -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Option/Arg.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::opt;
+
+Arg::Arg(const Option _Opt, StringRef S, unsigned _Index, const Arg *_BaseArg)
+  : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index),
+    Claimed(false), OwnsValues(false) {
+}
+
+Arg::Arg(const Option _Opt, StringRef S, unsigned _Index,
+         const char *Value0, const Arg *_BaseArg)
+  : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index),
+    Claimed(false), OwnsValues(false) {
+  Values.push_back(Value0);
+}
+
+Arg::Arg(const Option _Opt, StringRef S, unsigned _Index,
+         const char *Value0, const char *Value1, const Arg *_BaseArg)
+  : Opt(_Opt), BaseArg(_BaseArg), Spelling(S), Index(_Index),
+    Claimed(false), OwnsValues(false) {
+  Values.push_back(Value0);
+  Values.push_back(Value1);
+}
+
+Arg::~Arg() {
+  if (OwnsValues) {
+    for (unsigned i = 0, e = Values.size(); i != e; ++i)
+      delete[] Values[i];
+  }
+}
+
+void Arg::dump() const {
+  llvm::errs() << "<";
+
+  llvm::errs() << " Opt:";
+  Opt.dump();
+
+  llvm::errs() << " Index:" << Index;
+
+  llvm::errs() << " Values: [";
+  for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+    if (i) llvm::errs() << ", ";
+    llvm::errs() << "'" << Values[i] << "'";
+  }
+
+  llvm::errs() << "]>\n";
+}
+
+std::string Arg::getAsString(const ArgList &Args) const {
+  SmallString<256> Res;
+  llvm::raw_svector_ostream OS(Res);
+
+  ArgStringList ASL;
+  render(Args, ASL);
+  for (ArgStringList::iterator
+         it = ASL.begin(), ie = ASL.end(); it != ie; ++it) {
+    if (it != ASL.begin())
+      OS << ' ';
+    OS << *it;
+  }
+
+  return OS.str();
+}
+
+void Arg::renderAsInput(const ArgList &Args, ArgStringList &Output) const {
+  if (!getOption().hasNoOptAsInput()) {
+    render(Args, Output);
+    return;
+  }
+
+  for (unsigned i = 0, e = getNumValues(); i != e; ++i)
+    Output.push_back(getValue(i));
+}
+
+void Arg::render(const ArgList &Args, ArgStringList &Output) const {
+  switch (getOption().getRenderStyle()) {
+  case Option::RenderValuesStyle:
+    for (unsigned i = 0, e = getNumValues(); i != e; ++i)
+      Output.push_back(getValue(i));
+    break;
+
+  case Option::RenderCommaJoinedStyle: {
+    SmallString<256> Res;
+    llvm::raw_svector_ostream OS(Res);
+    OS << getSpelling();
+    for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+      if (i) OS << ',';
+      OS << getValue(i);
+    }
+    Output.push_back(Args.MakeArgString(OS.str()));
+    break;
+  }
+
+ case Option::RenderJoinedStyle:
+    Output.push_back(Args.GetOrMakeJoinedArgString(
+                       getIndex(), getSpelling(), getValue(0)));
+    for (unsigned i = 1, e = getNumValues(); i != e; ++i)
+      Output.push_back(getValue(i));
+    break;
+
+  case Option::RenderSeparateStyle:
+    Output.push_back(Args.MakeArgString(getSpelling()));
+    for (unsigned i = 0, e = getNumValues(); i != e; ++i)
+      Output.push_back(getValue(i));
+    break;
+  }
+}
diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp
new file mode 100644
index 0000000000..65cb51959e
--- /dev/null
+++ b/lib/Option/ArgList.cpp
@@ -0,0 +1,386 @@
+//===--- ArgList.cpp - Argument List Management ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Option/ArgList.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::opt;
+
+void arg_iterator::SkipToNextArg() {
+  for (; Current != Args.end(); ++Current) {
+    // Done if there are no filters.
+    if (!Id0.isValid())
+      break;
+
+    // Otherwise require a match.
+    const Option &O = (*Current)->getOption();
+    if (O.matches(Id0) ||
+        (Id1.isValid() && O.matches(Id1)) ||
+        (Id2.isValid() && O.matches(Id2)))
+      break;
+  }
+}
+
+//
+
+ArgList::ArgList() {
+}
+
+ArgList::~ArgList() {
+}
+
+void ArgList::append(Arg *A) {
+  Args.push_back(A);
+}
+
+void ArgList::eraseArg(OptSpecifier Id) {
+  for (iterator it = begin(), ie = end(); it != ie; ) {
+    if ((*it)->getOption().matches(Id)) {
+      it = Args.erase(it);
+      ie = end();
+    } else {
+      ++it;
+    }
+  }
+}
+
+Arg *ArgList::getLastArgNoClaim(OptSpecifier Id) const {
+  // FIXME: Make search efficient?
+  for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it)
+    if ((*it)->getOption().matches(Id))
+      return *it;
+  return 0;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id) const {
+  Arg *Res = 0;
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+    if ((*it)->getOption().matches(Id)) {
+      Res = *it;
+      Res->claim();
+    }
+  }
+
+  return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1) const {
+  Arg *Res = 0;
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+    if ((*it)->getOption().matches(Id0) ||
+        (*it)->getOption().matches(Id1)) {
+      Res = *it;
+      Res->claim();
+
+    }
+  }
+
+  return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+                         OptSpecifier Id2) const {
+  Arg *Res = 0;
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+    if ((*it)->getOption().matches(Id0) ||
+        (*it)->getOption().matches(Id1) ||
+        (*it)->getOption().matches(Id2)) {
+      Res = *it;
+      Res->claim();
+    }
+  }
+
+  return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+                         OptSpecifier Id2, OptSpecifier Id3) const {
+  Arg *Res = 0;
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+    if ((*it)->getOption().matches(Id0) ||
+        (*it)->getOption().matches(Id1) ||
+        (*it)->getOption().matches(Id2) ||
+        (*it)->getOption().matches(Id3)) {
+      Res = *it;
+      Res->claim();
+    }
+  }
+
+  return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+                         OptSpecifier Id2, OptSpecifier Id3,
+                         OptSpecifier Id4) const {
+  Arg *Res = 0;
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+    if ((*it)->getOption().matches(Id0) ||
+        (*it)->getOption().matches(Id1) ||
+        (*it)->getOption().matches(Id2) ||
+        (*it)->getOption().matches(Id3) ||
+        (*it)->getOption().matches(Id4)) {
+      Res = *it;
+      Res->claim();
+    }
+  }
+
+  return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+                         OptSpecifier Id2, OptSpecifier Id3,
+                         OptSpecifier Id4, OptSpecifier Id5) const {
+  Arg *Res = 0;
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+    if ((*it)->getOption().matches(Id0) ||
+        (*it)->getOption().matches(Id1) ||
+        (*it)->getOption().matches(Id2) ||
+        (*it)->getOption().matches(Id3) ||
+        (*it)->getOption().matches(Id4) ||
+        (*it)->getOption().matches(Id5)) {
+      Res = *it;
+      Res->claim();
+    }
+  }
+
+  return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+                         OptSpecifier Id2, OptSpecifier Id3,
+                         OptSpecifier Id4, OptSpecifier Id5,
+                         OptSpecifier Id6) const {
+  Arg *Res = 0;
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+    if ((*it)->getOption().matches(Id0) ||
+        (*it)->getOption().matches(Id1) ||
+        (*it)->getOption().matches(Id2) ||
+        (*it)->getOption().matches(Id3) ||
+        (*it)->getOption().matches(Id4) ||
+        (*it)->getOption().matches(Id5) ||
+        (*it)->getOption().matches(Id6)) {
+      Res = *it;
+      Res->claim();
+    }
+  }
+
+  return Res;
+}
+
+Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
+                         OptSpecifier Id2, OptSpecifier Id3,
+                         OptSpecifier Id4, OptSpecifier Id5,
+                         OptSpecifier Id6, OptSpecifier Id7) const {
+  Arg *Res = 0;
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
+    if ((*it)->getOption().matches(Id0) ||
+        (*it)->getOption().matches(Id1) ||
+        (*it)->getOption().matches(Id2) ||
+        (*it)->getOption().matches(Id3) ||
+        (*it)->getOption().matches(Id4) ||
+        (*it)->getOption().matches(Id5) ||
+        (*it)->getOption().matches(Id6) ||
+        (*it)->getOption().matches(Id7)) {
+      Res = *it;
+      Res->claim();
+    }
+  }
+
+  return Res;
+}
+
+bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default) const {
+  if (Arg *A = getLastArg(Pos, Neg))
+    return A->getOption().matches(Pos);
+  return Default;
+}
+
+StringRef ArgList::getLastArgValue(OptSpecifier Id,
+                                         StringRef Default) const {
+  if (Arg *A = getLastArg(Id))
+    return A->getValue();
+  return Default;
+}
+
+std::vector<std::string> ArgList::getAllArgValues(OptSpecifier Id) const {
+  SmallVector<const char *, 16> Values;
+  AddAllArgValues(Values, Id);
+  return std::vector<std::string>(Values.begin(), Values.end());
+}
+
+void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id) const {
+  if (Arg *A = getLastArg(Id)) {
+    A->claim();
+    A->render(*this, Output);
+  }
+}
+
+void ArgList::AddAllArgs(ArgStringList &Output, OptSpecifier Id0,
+                         OptSpecifier Id1, OptSpecifier Id2) const {
+  for (arg_iterator it = filtered_begin(Id0, Id1, Id2),
+         ie = filtered_end(); it != ie; ++it) {
+    (*it)->claim();
+    (*it)->render(*this, Output);
+  }
+}
+
+void ArgList::AddAllArgValues(ArgStringList &Output, OptSpecifier Id0,
+                              OptSpecifier Id1, OptSpecifier Id2) const {
+  for (arg_iterator it = filtered_begin(Id0, Id1, Id2),
+         ie = filtered_end(); it != ie; ++it) {
+    (*it)->claim();
+    for (unsigned i = 0, e = (*it)->getNumValues(); i != e; ++i)
+      Output.push_back((*it)->getValue(i));
+  }
+}
+
+void ArgList::AddAllArgsTranslated(ArgStringList &Output, OptSpecifier Id0,
+                                   const char *Translation,
+                                   bool Joined) const {
+  for (arg_iterator it = filtered_begin(Id0),
+         ie = filtered_end(); it != ie; ++it) {
+    (*it)->claim();
+
+    if (Joined) {
+      Output.push_back(MakeArgString(StringRef(Translation) +
+                                     (*it)->getValue(0)));
+    } else {
+      Output.push_back(Translation);
+      Output.push_back((*it)->getValue(0));
+    }
+  }
+}
+
+void ArgList::ClaimAllArgs(OptSpecifier Id0) const {
+  for (arg_iterator it = filtered_begin(Id0),
+         ie = filtered_end(); it != ie; ++it)
+    (*it)->claim();
+}
+
+void ArgList::ClaimAllArgs() const {
+  for (const_iterator it = begin(), ie = end(); it != ie; ++it)
+    if (!(*it)->isClaimed())
+      (*it)->claim();
+}
+
+const char *ArgList::MakeArgString(const Twine &T) const {
+  SmallString<256> Str;
+  T.toVector(Str);
+  return MakeArgString(Str.str());
+}
+
+const char *ArgList::GetOrMakeJoinedArgString(unsigned Index,
+                                              StringRef LHS,
+                                              StringRef RHS) const {
+  StringRef Cur = getArgString(Index);
+  if (Cur.size() == LHS.size() + RHS.size() &&
+      Cur.startswith(LHS) && Cur.endswith(RHS))
+    return Cur.data();
+
+  return MakeArgString(LHS + RHS);
+}
+
+//
+
+InputArgList::InputArgList(const char* const *ArgBegin,
+                           const char* const *ArgEnd)
+  : NumInputArgStrings(ArgEnd - ArgBegin) {
+  ArgStrings.append(ArgBegin, ArgEnd);
+}
+
+InputArgList::~InputArgList() {
+  // An InputArgList always owns its arguments.
+  for (iterator it = begin(), ie = end(); it != ie; ++it)
+    delete *it;
+}
+
+unsigned InputArgList::MakeIndex(StringRef String0) const {
+  unsigned Index = ArgStrings.size();
+
+  // Tuck away so we have a reliable const char *.
+  SynthesizedStrings.push_back(String0);
+  ArgStrings.push_back(SynthesizedStrings.back().c_str());
+
+  return Index;
+}
+
+unsigned InputArgList::MakeIndex(StringRef String0,
+                                 StringRef String1) const {
+  unsigned Index0 = MakeIndex(String0);
+  unsigned Index1 = MakeIndex(String1);
+  assert(Index0 + 1 == Index1 && "Unexpected non-consecutive indices!");
+  (void) Index1;
+  return Index0;
+}
+
+const char *InputArgList::MakeArgString(StringRef Str) const {
+  return getArgString(MakeIndex(Str));
+}
+
+//
+
+DerivedArgList::DerivedArgList(const InputArgList &_BaseArgs)
+  : BaseArgs(_BaseArgs) {
+}
+
+DerivedArgList::~DerivedArgList() {
+  // We only own the arguments we explicitly synthesized.
+  for (iterator it = SynthesizedArgs.begin(), ie = SynthesizedArgs.end();
+       it != ie; ++it)
+    delete *it;
+}
+
+const char *DerivedArgList::MakeArgString(StringRef Str) const {
+  return BaseArgs.MakeArgString(Str);
+}
+
+Arg *DerivedArgList::MakeFlagArg(const Arg *BaseArg, const Option Opt) const {
+  Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) +
+                                               Twine(Opt.getName())),
+                   BaseArgs.MakeIndex(Opt.getName()), BaseArg);
+  SynthesizedArgs.push_back(A);
+  return A;
+}
+
+Arg *DerivedArgList::MakePositionalArg(const Arg *BaseArg, const Option Opt,
+                                       StringRef Value) const {
+  unsigned Index = BaseArgs.MakeIndex(Value);
+  Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) +
+                                               Twine(Opt.getName())),
+                   Index, BaseArgs.getArgString(Index), BaseArg);
+  SynthesizedArgs.push_back(A);
+  return A;
+}
+
+Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt,
+                                     StringRef Value) const {
+  unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value);
+  Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) +
+                                               Twine(Opt.getName())),
+                   Index, BaseArgs.getArgString(Index + 1), BaseArg);
+  SynthesizedArgs.push_back(A);
+  return A;
+}
+
+Arg *DerivedArgList::MakeJoinedArg(const Arg *BaseArg, const Option Opt,
+                                   StringRef Value) const {
+  unsigned Index = BaseArgs.MakeIndex(Opt.getName().str() + Value.str());
+  Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) +
+                                               Twine(Opt.getName())), Index,
+                   BaseArgs.getArgString(Index) + Opt.getName().size(),
+                   BaseArg);
+  SynthesizedArgs.push_back(A);
+  return A;
+}
diff --git a/lib/Option/CMakeLists.txt b/lib/Option/CMakeLists.txt
new file mode 100644
index 0000000000..2e7acc27a5
--- /dev/null
+++ b/lib/Option/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_llvm_library(LLVMOption
+  Arg.cpp
+  ArgList.cpp
+  Option.cpp
+  OptTable.cpp
+  )
+
+target_link_libraries(LLVMOption LLVMSupport)
diff --git a/lib/Option/LLVMBuild.txt b/lib/Option/LLVMBuild.txt
new file mode 100644
index 0000000000..0b78cf20c0
--- /dev/null
+++ b/lib/Option/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Option/LLVMBuild.txt -------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Option
+parent = Libraries
+required_libraries = Support
diff --git a/lib/Option/Makefile b/lib/Option/Makefile
new file mode 100644
index 0000000000..255d0796e2
--- /dev/null
+++ b/lib/Option/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Option/Makefile ---------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMOption
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp
new file mode 100644
index 0000000000..3b34bf3b3a
--- /dev/null
+++ b/lib/Option/OptTable.cpp
@@ -0,0 +1,388 @@
+//===--- OptTable.cpp - Option Table Implementation -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Option/OptTable.h"
+
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <map>
+
+using namespace llvm;
+using namespace llvm::opt;
+
+// Ordering on Info. The ordering is *almost* lexicographic, with two
+// exceptions. First, '\0' comes at the end of the alphabet instead of
+// the beginning (thus options precede any other options which prefix
+// them). Second, for options with the same name, the less permissive
+// version should come first; a Flag option should precede a Joined
+// option, for example.
+
+static int StrCmpOptionName(const char *A, const char *B) {
+  char a = *A, b = *B;
+  while (a == b) {
+    if (a == '\0')
+      return 0;
+
+    a = *++A;
+    b = *++B;
+  }
+
+  if (a == '\0') // A is a prefix of B.
+    return 1;
+  if (b == '\0') // B is a prefix of A.
+    return -1;
+
+  // Otherwise lexicographic.
+  return (a < b) ? -1 : 1;
+}
+
+namespace llvm {
+namespace opt {
+
+static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
+  if (&A == &B)
+    return false;
+
+  if (int N = StrCmpOptionName(A.Name, B.Name))
+    return N == -1;
+
+  for (const char * const *APre = A.Prefixes,
+                  * const *BPre = B.Prefixes;
+                          *APre != 0 && *BPre != 0; ++APre, ++BPre) {
+    if (int N = StrCmpOptionName(*APre, *BPre))
+      return N == -1;
+  }
+
+  // Names are the same, check that classes are in order; exactly one
+  // should be joined, and it should succeed the other.
+  assert(((A.Kind == Option::JoinedClass) ^ (B.Kind == Option::JoinedClass)) &&
+         "Unexpected classes for options with same name.");
+  return B.Kind == Option::JoinedClass;
+}
+
+// Support lower_bound between info and an option name.
+static inline bool operator<(const OptTable::Info &I, const char *Name) {
+  return StrCmpOptionName(I.Name, Name) == -1;
+}
+static inline bool operator<(const char *Name, const OptTable::Info &I) {
+  return StrCmpOptionName(Name, I.Name) == -1;
+}
+}
+}
+
+OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {}
+
+OptTable::OptTable(const Info *_OptionInfos, unsigned _NumOptionInfos)
+  : OptionInfos(_OptionInfos),
+    NumOptionInfos(_NumOptionInfos),
+    TheInputOptionID(0),
+    TheUnknownOptionID(0),
+    FirstSearchableIndex(0)
+{
+  // Explicitly zero initialize the error to work around a bug in array
+  // value-initialization on MinGW with gcc 4.3.5.
+
+  // Find start of normal options.
+  for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+    unsigned Kind = getInfo(i + 1).Kind;
+    if (Kind == Option::InputClass) {
+      assert(!TheInputOptionID && "Cannot have multiple input options!");
+      TheInputOptionID = getInfo(i + 1).ID;
+    } else if (Kind == Option::UnknownClass) {
+      assert(!TheUnknownOptionID && "Cannot have multiple unknown options!");
+      TheUnknownOptionID = getInfo(i + 1).ID;
+    } else if (Kind != Option::GroupClass) {
+      FirstSearchableIndex = i;
+      break;
+    }
+  }
+  assert(FirstSearchableIndex != 0 && "No searchable options?");
+
+#ifndef NDEBUG
+  // Check that everything after the first searchable option is a
+  // regular option class.
+  for (unsigned i = FirstSearchableIndex, e = getNumOptions(); i != e; ++i) {
+    Option::OptionClass Kind = (Option::OptionClass) getInfo(i + 1).Kind;
+    assert((Kind != Option::InputClass && Kind != Option::UnknownClass &&
+            Kind != Option::GroupClass) &&
+           "Special options should be defined first!");
+  }
+
+  // Check that options are in order.
+  for (unsigned i = FirstSearchableIndex + 1, e = getNumOptions(); i != e; ++i){
+    if (!(getInfo(i) < getInfo(i + 1))) {
+      getOption(i).dump();
+      getOption(i + 1).dump();
+      llvm_unreachable("Options are not in order!");
+    }
+  }
+#endif
+
+  // Build prefixes.
+  for (unsigned i = FirstSearchableIndex + 1, e = getNumOptions() + 1;
+                i != e; ++i) {
+    if (const char *const *P = getInfo(i).Prefixes) {
+      for (; *P != 0; ++P) {
+        PrefixesUnion.insert(*P);
+      }
+    }
+  }
+
+  // Build prefix chars.
+  for (llvm::StringSet<>::const_iterator I = PrefixesUnion.begin(),
+                                         E = PrefixesUnion.end(); I != E; ++I) {
+    StringRef Prefix = I->getKey();
+    for (StringRef::const_iterator C = Prefix.begin(), CE = Prefix.end();
+                                   C != CE; ++C)
+      if (std::find(PrefixChars.begin(), PrefixChars.end(), *C)
+            == PrefixChars.end())
+        PrefixChars.push_back(*C);
+  }
+}
+
+OptTable::~OptTable() {
+}
+
+const Option OptTable::getOption(OptSpecifier Opt) const {
+  unsigned id = Opt.getID();
+  if (id == 0)
+    return Option(0, 0);
+  assert((unsigned) (id - 1) < getNumOptions() && "Invalid ID.");
+  return Option(&getInfo(id), this);
+}
+
+bool OptTable::isOptionHelpHidden(OptSpecifier id) const {
+  return getInfo(id).Flags & HelpHidden;
+}
+
+static bool isInput(const llvm::StringSet<> &Prefixes, StringRef Arg) {
+  if (Arg == "-")
+    return true;
+  for (llvm::StringSet<>::const_iterator I = Prefixes.begin(),
+                                         E = Prefixes.end(); I != E; ++I)
+    if (Arg.startswith(I->getKey()))
+      return false;
+  return true;
+}
+
+/// \returns Matched size. 0 means no match.
+static unsigned matchOption(const OptTable::Info *I, StringRef Str) {
+  for (const char * const *Pre = I->Prefixes; *Pre != 0; ++Pre) {
+    StringRef Prefix(*Pre);
+    if (Str.startswith(Prefix) && Str.substr(Prefix.size()).startswith(I->Name))
+      return Prefix.size() + StringRef(I->Name).size();
+  }
+  return 0;
+}
+
+Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index) const {
+  unsigned Prev = Index;
+  const char *Str = Args.getArgString(Index);
+
+  // Anything that doesn't start with PrefixesUnion is an input, as is '-'
+  // itself.
+  if (isInput(PrefixesUnion, Str))
+    return new Arg(getOption(TheInputOptionID), Str, Index++, Str);
+
+  const Info *Start = OptionInfos + FirstSearchableIndex;
+  const Info *End = OptionInfos + getNumOptions();
+  StringRef Name = StringRef(Str).ltrim(PrefixChars);
+
+  // Search for the first next option which could be a prefix.
+  Start = std::lower_bound(Start, End, Name.data());
+
+  // Options are stored in sorted order, with '\0' at the end of the
+  // alphabet. Since the only options which can accept a string must
+  // prefix it, we iteratively search for the next option which could
+  // be a prefix.
+  //
+  // FIXME: This is searching much more than necessary, but I am
+  // blanking on the simplest way to make it fast. We can solve this
+  // problem when we move to TableGen.
+  for (; Start != End; ++Start) {
+    unsigned ArgSize = 0;
+    // Scan for first option which is a proper prefix.
+    for (; Start != End; ++Start)
+      if ((ArgSize = matchOption(Start, Str)))
+        break;
+    if (Start == End)
+      break;
+
+    // See if this option matches.
+    if (Arg *A = Option(Start, this).accept(Args, Index, ArgSize))
+      return A;
+
+    // Otherwise, see if this argument was missing values.
+    if (Prev != Index)
+      return 0;
+  }
+
+  return new Arg(getOption(TheUnknownOptionID), Str, Index++, Str);
+}
+
+InputArgList *OptTable::ParseArgs(const char* const *ArgBegin,
+                                  const char* const *ArgEnd,
+                                  unsigned &MissingArgIndex,
+                                  unsigned &MissingArgCount) const {
+  InputArgList *Args = new InputArgList(ArgBegin, ArgEnd);
+
+  // FIXME: Handle '@' args (or at least error on them).
+
+  MissingArgIndex = MissingArgCount = 0;
+  unsigned Index = 0, End = ArgEnd - ArgBegin;
+  while (Index < End) {
+    // Ignore empty arguments (other things may still take them as arguments).
+    if (Args->getArgString(Index)[0] == '\0') {
+      ++Index;
+      continue;
+    }
+
+    unsigned Prev = Index;
+    Arg *A = ParseOneArg(*Args, Index);
+    assert(Index > Prev && "Parser failed to consume argument.");
+
+    // Check for missing argument error.
+    if (!A) {
+      assert(Index >= End && "Unexpected parser error.");
+      assert(Index - Prev - 1 && "No missing arguments!");
+      MissingArgIndex = Prev;
+      MissingArgCount = Index - Prev - 1;
+      break;
+    }
+
+    Args->append(A);
+  }
+
+  return Args;
+}
+
+static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) {
+  const Option O = Opts.getOption(Id);
+  std::string Name = O.getPrefixedName();
+
+  // Add metavar, if used.
+  switch (O.getKind()) {
+  case Option::GroupClass: case Option::InputClass: case Option::UnknownClass:
+    llvm_unreachable("Invalid option with help text.");
+
+  case Option::MultiArgClass:
+    llvm_unreachable("Cannot print metavar for this kind of option.");
+
+  case Option::FlagClass:
+    break;
+
+  case Option::SeparateClass: case Option::JoinedOrSeparateClass:
+    Name += ' ';
+    // FALLTHROUGH
+  case Option::JoinedClass: case Option::CommaJoinedClass:
+  case Option::JoinedAndSeparateClass:
+    if (const char *MetaVarName = Opts.getOptionMetaVar(Id))
+      Name += MetaVarName;
+    else
+      Name += "<value>";
+    break;
+  }
+
+  return Name;
+}
+
+static void PrintHelpOptionList(raw_ostream &OS, StringRef Title,
+                                std::vector<std::pair<std::string,
+                                const char*> > &OptionHelp) {
+  OS << Title << ":\n";
+
+  // Find the maximum option length.
+  unsigned OptionFieldWidth = 0;
+  for (unsigned i = 0, e = OptionHelp.size(); i != e; ++i) {
+    // Skip titles.
+    if (!OptionHelp[i].second)
+      continue;
+
+    // Limit the amount of padding we are willing to give up for alignment.
+    unsigned Length = OptionHelp[i].first.size();
+    if (Length <= 23)
+      OptionFieldWidth = std::max(OptionFieldWidth, Length);
+  }
+
+  const unsigned InitialPad = 2;
+  for (unsigned i = 0, e = OptionHelp.size(); i != e; ++i) {
+    const std::string &Option = OptionHelp[i].first;
+    int Pad = OptionFieldWidth - int(Option.size());
+    OS.indent(InitialPad) << Option;
+
+    // Break on long option names.
+    if (Pad < 0) {
+      OS << "\n";
+      Pad = OptionFieldWidth + InitialPad;
+    }
+    OS.indent(Pad + 1) << OptionHelp[i].second << '\n';
+  }
+}
+
+static const char *getOptionHelpGroup(const OptTable &Opts, OptSpecifier Id) {
+  unsigned GroupID = Opts.getOptionGroupID(Id);
+
+  // If not in a group, return the default help group.
+  if (!GroupID)
+    return "OPTIONS";
+
+  // Abuse the help text of the option groups to store the "help group"
+  // name.
+  //
+  // FIXME: Split out option groups.
+  if (const char *GroupHelp = Opts.getOptionHelpText(GroupID))
+    return GroupHelp;
+
+  // Otherwise keep looking.
+  return getOptionHelpGroup(Opts, GroupID);
+}
+
+void OptTable::PrintHelp(raw_ostream &OS, const char *Name,
+                         const char *Title, bool ShowHidden) const {
+  OS << "OVERVIEW: " << Title << "\n";
+  OS << '\n';
+  OS << "USAGE: " << Name << " [options] <inputs>\n";
+  OS << '\n';
+
+  // Render help text into a map of group-name to a list of (option, help)
+  // pairs.
+  typedef std::map<std::string,
+                 std::vector<std::pair<std::string, const char*> > > helpmap_ty;
+  helpmap_ty GroupedOptionHelp;
+
+  for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+    unsigned Id = i + 1;
+
+    // FIXME: Split out option groups.
+    if (getOptionKind(Id) == Option::GroupClass)
+      continue;
+
+    if (!ShowHidden && isOptionHelpHidden(Id))
+      continue;
+
+    if (const char *Text = getOptionHelpText(Id)) {
+      const char *HelpGroup = getOptionHelpGroup(*this, Id);
+      const std::string &OptName = getOptionHelpName(*this, Id);
+      GroupedOptionHelp[HelpGroup].push_back(std::make_pair(OptName, Text));
+    }
+  }
+
+  for (helpmap_ty::iterator it = GroupedOptionHelp .begin(),
+         ie = GroupedOptionHelp.end(); it != ie; ++it) {
+    if (it != GroupedOptionHelp .begin())
+      OS << "\n";
+    PrintHelpOptionList(OS, it->first, it->second);
+  }
+
+  OS.flush();
+}
diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp
new file mode 100644
index 0000000000..003f07f037
--- /dev/null
+++ b/lib/Option/Option.cpp
@@ -0,0 +1,204 @@
+//===--- Option.cpp - Abstract Driver Options -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Option/Option.h"
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include <algorithm>
+#include <cassert>
+
+using namespace llvm;
+using namespace llvm::opt;
+
+Option::Option(const OptTable::Info *info, const OptTable *owner)
+  : Info(info), Owner(owner) {
+
+  // Multi-level aliases are not supported, and alias options cannot
+  // have groups. This just simplifies option tracking, it is not an
+  // inherent limitation.
+  assert((!Info || !getAlias().isValid() || (!getAlias().getAlias().isValid() &&
+         !getGroup().isValid())) &&
+         "Multi-level aliases and aliases with groups are unsupported.");
+}
+
+Option::~Option() {
+}
+
+void Option::dump() const {
+  llvm::errs() << "<";
+  switch (getKind()) {
+#define P(N) case N: llvm::errs() << #N; break
+    P(GroupClass);
+    P(InputClass);
+    P(UnknownClass);
+    P(FlagClass);
+    P(JoinedClass);
+    P(SeparateClass);
+    P(CommaJoinedClass);
+    P(MultiArgClass);
+    P(JoinedOrSeparateClass);
+    P(JoinedAndSeparateClass);
+#undef P
+  }
+
+  llvm::errs() << " Prefixes:[";
+  for (const char * const *Pre = Info->Prefixes; *Pre != 0; ++Pre) {
+    llvm::errs() << '"' << *Pre << (*(Pre + 1) == 0 ? "\"" : "\", ");
+  }
+  llvm::errs() << ']';
+
+  llvm::errs() << " Name:\"" << getName() << '"';
+
+  const Option Group = getGroup();
+  if (Group.isValid()) {
+    llvm::errs() << " Group:";
+    Group.dump();
+  }
+
+  const Option Alias = getAlias();
+  if (Alias.isValid()) {
+    llvm::errs() << " Alias:";
+    Alias.dump();
+  }
+
+  if (getKind() == MultiArgClass)
+    llvm::errs() << " NumArgs:" << getNumArgs();
+
+  llvm::errs() << ">\n";
+}
+
+bool Option::matches(OptSpecifier Opt) const {
+  // Aliases are never considered in matching, look through them.
+  const Option Alias = getAlias();
+  if (Alias.isValid())
+    return Alias.matches(Opt);
+
+  // Check exact match.
+  if (getID() == Opt.getID())
+    return true;
+
+  const Option Group = getGroup();
+  if (Group.isValid())
+    return Group.matches(Opt);
+  return false;
+}
+
+Arg *Option::accept(const ArgList &Args,
+                    unsigned &Index,
+                    unsigned ArgSize) const {
+  const Option &UnaliasedOption = getUnaliasedOption();
+  StringRef Spelling;
+  // If the option was an alias, get the spelling from the unaliased one.
+  if (getID() == UnaliasedOption.getID()) {
+    Spelling = StringRef(Args.getArgString(Index), ArgSize);
+  } else {
+    Spelling = Args.MakeArgString(Twine(UnaliasedOption.getPrefix()) +
+                                  Twine(UnaliasedOption.getName()));
+  }
+
+  switch (getKind()) {
+  case FlagClass:
+    if (ArgSize != strlen(Args.getArgString(Index)))
+      return 0;
+
+    return new Arg(UnaliasedOption, Spelling, Index++);
+  case JoinedClass: {
+    const char *Value = Args.getArgString(Index) + ArgSize;
+    return new Arg(UnaliasedOption, Spelling, Index++, Value);
+  }
+  case CommaJoinedClass: {
+    // Always matches.
+    const char *Str = Args.getArgString(Index) + ArgSize;
+    Arg *A = new Arg(UnaliasedOption, Spelling, Index++);
+
+    // Parse out the comma separated values.
+    const char *Prev = Str;
+    for (;; ++Str) {
+      char c = *Str;
+
+      if (!c || c == ',') {
+        if (Prev != Str) {
+          char *Value = new char[Str - Prev + 1];
+          memcpy(Value, Prev, Str - Prev);
+          Value[Str - Prev] = '\0';
+          A->getValues().push_back(Value);
+        }
+
+        if (!c)
+          break;
+
+        Prev = Str + 1;
+      }
+    }
+    A->setOwnsValues(true);
+
+    return A;
+  }
+  case SeparateClass:
+    // Matches iff this is an exact match.
+    // FIXME: Avoid strlen.
+    if (ArgSize != strlen(Args.getArgString(Index)))
+      return 0;
+
+    Index += 2;
+    if (Index > Args.getNumInputArgStrings())
+      return 0;
+
+    return new Arg(UnaliasedOption, Spelling,
+                   Index - 2, Args.getArgString(Index - 1));
+  case MultiArgClass: {
+    // Matches iff this is an exact match.
+    // FIXME: Avoid strlen.
+    if (ArgSize != strlen(Args.getArgString(Index)))
+      return 0;
+
+    Index += 1 + getNumArgs();
+    if (Index > Args.getNumInputArgStrings())
+      return 0;
+
+    Arg *A = new Arg(UnaliasedOption, Spelling, Index - 1 - getNumArgs(),
+                      Args.getArgString(Index - getNumArgs()));
+    for (unsigned i = 1; i != getNumArgs(); ++i)
+      A->getValues().push_back(Args.getArgString(Index - getNumArgs() + i));
+    return A;
+  }
+  case JoinedOrSeparateClass: {
+    // If this is not an exact match, it is a joined arg.
+    // FIXME: Avoid strlen.
+    if (ArgSize != strlen(Args.getArgString(Index))) {
+      const char *Value = Args.getArgString(Index) + ArgSize;
+      return new Arg(*this, Spelling, Index++, Value);
+    }
+
+    // Otherwise it must be separate.
+    Index += 2;
+    if (Index > Args.getNumInputArgStrings())
+      return 0;
+
+    return new Arg(UnaliasedOption, Spelling,
+                   Index - 2, Args.getArgString(Index - 1));
+  }
+  case JoinedAndSeparateClass:
+    // Always matches.
+    Index += 2;
+    if (Index > Args.getNumInputArgStrings())
+      return 0;
+
+    return new Arg(UnaliasedOption, Spelling, Index - 2,
+                   Args.getArgString(Index - 2) + ArgSize,
+                   Args.getArgString(Index - 1));
+  default:
+    llvm_unreachable("Invalid option kind!");
+  }
+}
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 7e8b4a3d0d..17f38918b3 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -19,8 +19,8 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include <limits.h>
 #include <cstring>
+#include <limits.h>
 
 using namespace llvm;
 
@@ -2761,9 +2761,11 @@ APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
   // normalize against the "double" minExponent first, and only *then*
   // truncate the mantissa.  The result of that second conversion
   // may be inexact, but should never underflow.
-  APFloat extended(*this);
+  // Declare fltSemantics before APFloat that uses it (and
+  // saves pointer to it) to ensure correct destruction order.
   fltSemantics extendedSemantics = *semantics;
   extendedSemantics.minExponent = IEEEdouble.minExponent;
+  APFloat extended(*this);
   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
   assert(fs == opOK && !losesInfo);
   (void)fs;
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 38cfaed9d2..61e503bc3a 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -23,9 +23,9 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cmath>
-#include <limits>
-#include <cstring>
 #include <cstdlib>
+#include <cstring>
+#include <limits>
 using namespace llvm;
 
 /// A utility function for allocating memory, checking for allocation failures,
diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp
index b8978302e7..28f4e64ac7 100644
--- a/lib/Support/Allocator.cpp
+++ b/lib/Support/Allocator.cpp
@@ -13,9 +13,9 @@
 
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Memory.h"
 #include "llvm/Support/Recycler.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Memory.h"
 #include <cstring>
 
 namespace llvm {
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index fc4f1891d9..53fcf06f82 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -17,20 +17,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/system_error.h"
-#include "llvm/Support/Host.h"
-#include "llvm/Support/Path.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
 #include <cerrno>
 #include <cstdlib>
 using namespace llvm;
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index 508bec4028..411b9c20bf 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -10,11 +10,11 @@
 #include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/ThreadLocal.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <setjmp.h>
 #include <cstdio>
+#include <setjmp.h>
 using namespace llvm;
 
 namespace {
diff --git a/lib/Support/DataStream.cpp b/lib/Support/DataStream.cpp
index 3a38e2a66b..0a02281c25 100644
--- a/lib/Support/DataStream.cpp
+++ b/lib/Support/DataStream.cpp
@@ -15,13 +15,13 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "Data-stream"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/DataStream.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/system_error.h"
-#include <string>
 #include <cerrno>
 #include <cstdio>
+#include <string>
 #if !defined(_MSC_VER) && !defined(__MINGW32__)
 #include <unistd.h>
 #else
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index c8e8900749..0c0f15eddd 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -23,10 +23,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/circular_raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/circular_raw_ostream.h"
 
 using namespace llvm;
 
diff --git a/lib/Support/Disassembler.cpp b/lib/Support/Disassembler.cpp
index c6d73bcad3..b3244fab7d 100644
--- a/lib/Support/Disassembler.cpp
+++ b/lib/Support/Disassembler.cpp
@@ -12,13 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Config/config.h"
 #include "llvm/Support/Disassembler.h"
-
+#include "llvm/Config/config.h"
 #include <cassert>
 #include <iomanip>
-#include <string>
 #include <sstream>
+#include <string>
 
 #if USE_UDIS86
 #include <udis86.h>
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
index 5c59a3ef8e..de70b0c00c 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/Support/Dwarf.cpp
@@ -248,6 +248,14 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) {
   case DW_AT_APPLE_property_attribute:   return "DW_AT_APPLE_property_attribute";
   case DW_AT_APPLE_property:             return "DW_AT_APPLE_property";
   case DW_AT_APPLE_objc_complete_type:   return "DW_AT_APPLE_objc_complete_type";
+
+    // DWARF5 Fission Extension Attributes
+  case DW_AT_GNU_dwo_name:               return "DW_AT_GNU_dwo_name";
+  case DW_AT_GNU_dwo_id:                 return "DW_AT_GNU_dwo_id";
+  case DW_AT_GNU_ranges_base:            return "DW_AT_GNU_ranges_base";
+  case DW_AT_GNU_addr_base:              return "DW_AT_GNU_addr_base";
+  case DW_AT_GNU_pubnames:               return "DW_AT_GNU_pubnames";
+  case DW_AT_GNU_pubtypes:               return "DW_AT_GNU_pubtypes";
   }
   return 0;
 }
@@ -281,6 +289,10 @@ const char *llvm::dwarf::FormEncodingString(unsigned Encoding) {
   case DW_FORM_exprloc:                  return "DW_FORM_exprloc";
   case DW_FORM_flag_present:             return "DW_FORM_flag_present";
   case DW_FORM_ref_sig8:                 return "DW_FORM_ref_sig8";
+
+    // DWARF5 Fission Extension Forms
+  case DW_FORM_GNU_addr_index:           return "DW_FORM_GNU_addr_index";
+  case DW_FORM_GNU_str_index:            return "DW_FORM_GNU_str_index";
   }
   return 0;
 }
@@ -445,6 +457,10 @@ const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) {
   case DW_OP_stack_value:                return "DW_OP_stack_value";
   case DW_OP_lo_user:                    return "DW_OP_lo_user";
   case DW_OP_hi_user:                    return "DW_OP_hi_user";
+
+    // DWARF5 Fission Proposal Op Extensions
+  case DW_OP_GNU_addr_index:             return "DW_OP_GNU_addr_index";
+  case DW_OP_GNU_const_index:            return "DW_OP_GNU_const_index";
   }
   return 0;
 }
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index d8884381ab..b04681d577 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -13,11 +13,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/DenseSet.h"
 #include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/Mutex.h"
 #include <cstdio>
 #include <cstring>
 
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index e6cc57db82..d4382e54e0 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -12,14 +12,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/Threading.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Config/config.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cstdlib>
 
diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp
index 7dc9587caa..cd430f218b 100644
--- a/lib/Support/FileOutputBuffer.cpp
+++ b/lib/Support/FileOutputBuffer.cpp
@@ -12,37 +12,28 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/FileOutputBuffer.h"
-
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 
+using llvm::sys::fs::mapped_file_region;
 
 namespace llvm {
-
-
-FileOutputBuffer::FileOutputBuffer(uint8_t *Start, uint8_t *End, 
-                                  StringRef Path, StringRef TmpPath)
-  : BufferStart(Start), BufferEnd(End) {
-  FinalPath.assign(Path);
-  TempPath.assign(TmpPath);
+FileOutputBuffer::FileOutputBuffer(mapped_file_region * R,
+                                   StringRef Path, StringRef TmpPath)
+  : Region(R)
+  , FinalPath(Path)
+  , TempPath(TmpPath) {
 }
 
-
 FileOutputBuffer::~FileOutputBuffer() {
-  // If not already commited, delete buffer and remove temp file.
-  if ( BufferStart != NULL ) {
-    sys::fs::unmap_file_pages((void*)BufferStart, getBufferSize());
-    bool Existed;
-    sys::fs::remove(Twine(TempPath), Existed);
-  }
+  bool Existed;
+  sys::fs::remove(Twine(TempPath), Existed);
 }
 
- 
-error_code FileOutputBuffer::create(StringRef FilePath, 
-                                    size_t Size,  
+error_code FileOutputBuffer::create(StringRef FilePath,
+                                    size_t Size,
                                     OwningPtr<FileOutputBuffer> &Result,
                                     unsigned Flags) {
   // If file already exists, it must be a regular file (to be mappable).
@@ -70,34 +61,27 @@ error_code FileOutputBuffer::create(StringRef FilePath,
   EC = sys::fs::remove(FilePath, Existed);
   if (EC)
     return EC;
-  
+
   // Create new file in same directory but with random name.
   SmallString<128> TempFilePath;
   int FD;
-  EC = sys::fs::unique_file(Twine(FilePath) + ".tmp%%%%%%%",  
-                                                FD, TempFilePath, false, 0644);
+  EC = sys::fs::unique_file(Twine(FilePath) + ".tmp%%%%%%%",
+                            FD, TempFilePath, false, 0644);
   if (EC)
     return EC;
-  
-  // The unique_file() interface leaks lower layers and returns a file 
-  // descriptor.  There is no way to directly close it, so use this hack
-  // to hand it off to raw_fd_ostream to close for us.
-  {
-    raw_fd_ostream Dummy(FD, /*shouldClose=*/true);
-  }
-  
-  // Resize file to requested initial size
-  EC = sys::fs::resize_file(Twine(TempFilePath), Size);
+
+  OwningPtr<mapped_file_region> MappedFile(
+    new mapped_file_region(FD, mapped_file_region::readwrite, Size, 0, EC));
   if (EC)
     return EC;
-  
+
   // If requested, make the output file executable.
   if ( Flags & F_executable ) {
     sys::fs::file_status Stat2;
     EC = sys::fs::status(Twine(TempFilePath), Stat2);
     if (EC)
       return EC;
-    
+
     sys::fs::perms new_perms = Stat2.permissions();
     if ( new_perms & sys::fs::owner_read )
       new_perms |= sys::fs::owner_exe;
@@ -111,38 +95,25 @@ error_code FileOutputBuffer::create(StringRef FilePath,
       return EC;
   }
 
-  // Memory map new file.
-  void *Base;
-  EC = sys::fs::map_file_pages(Twine(TempFilePath), 0, Size, true, Base);
-  if (EC)
-    return EC;
-  
-  // Create FileOutputBuffer object to own mapped range.
-  uint8_t *Start = reinterpret_cast<uint8_t*>(Base);
-  Result.reset(new FileOutputBuffer(Start, Start+Size, FilePath, TempFilePath));
-                     
-  return error_code::success();
-}                    
+  Result.reset(new FileOutputBuffer(MappedFile.get(), FilePath, TempFilePath));
+  if (Result)
+    MappedFile.take();
 
+  return error_code::success();
+}
 
 error_code FileOutputBuffer::commit(int64_t NewSmallerSize) {
   // Unmap buffer, letting OS flush dirty pages to file on disk.
-  void *Start = reinterpret_cast<void*>(BufferStart);
-  error_code EC = sys::fs::unmap_file_pages(Start, getBufferSize());
-  if (EC)
-    return EC;
-  
+  Region.reset(0);
+
   // If requested, resize file as part of commit.
   if ( NewSmallerSize != -1 ) {
-    EC = sys::fs::resize_file(Twine(TempPath), NewSmallerSize);
+    error_code EC = sys::fs::resize_file(Twine(TempPath), NewSmallerSize);
     if (EC)
       return EC;
   }
-  
+
   // Rename file to final name.
   return sys::fs::rename(Twine(TempPath), Twine(FinalPath));
 }
-
-
 } // namespace
-
diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp
index f9e9cf0366..fc8a2f31bd 100644
--- a/lib/Support/FileUtilities.cpp
+++ b/lib/Support/FileUtilities.cpp
@@ -13,15 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/FileUtilities.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallString.h"
+#include <cctype>
 #include <cstdlib>
 #include <cstring>
-#include <cctype>
 using namespace llvm;
 
 static bool isSignedChar(char C) {
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index 4672554022..9c44c9348b 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -16,8 +16,8 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Host.h"
+#include "llvm/Support/MathExtras.h"
 #include <cassert>
 #include <cstring>
 using namespace llvm;
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index f6aaf83811..669c238da9 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/GraphWriter.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
-#include "llvm/Config/config.h"
 using namespace llvm;
 
 static cl::opt<bool> ViewBackground("view-background", cl::Hidden,
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 34e32b817b..35bfb49a1f 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -11,14 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/Host.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Config/config.h"
 #include "llvm/Support/DataStream.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Host.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Config/config.h"
 #include <string.h>
 
 // Include the platform-specific parts of this class.
diff --git a/lib/Support/LocaleWindows.inc b/lib/Support/LocaleWindows.inc
index 6827ac15a1..28e429c0cb 100644
--- a/lib/Support/LocaleWindows.inc
+++ b/lib/Support/LocaleWindows.inc
@@ -12,4 +12,4 @@ bool isPrint(int c) {
 
 }
 }
-}
-\ No newline at end of file
+}
diff --git a/lib/Support/LocaleXlocale.inc b/lib/Support/LocaleXlocale.inc
index f595e7c582..389fe3d1d4 100644
--- a/lib/Support/LocaleXlocale.inc
+++ b/lib/Support/LocaleXlocale.inc
@@ -1,5 +1,5 @@
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ManagedStatic.h"
 #include <cassert>
 #include <xlocale.h>
diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp
index 7610d281f0..dc28a72421 100644
--- a/lib/Support/LockFileManager.cpp
+++ b/lib/Support/LockFileManager.cpp
@@ -10,8 +10,8 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include <fstream>
-#include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/types.h>
 #if LLVM_ON_WIN32
 #include <windows.h>
 #endif
diff --git a/lib/Support/Memory.cpp b/lib/Support/Memory.cpp
index 12f083822f..f9a4903ad0 100644
--- a/lib/Support/Memory.cpp
+++ b/lib/Support/Memory.cpp
@@ -13,8 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Memory.h"
-#include "llvm/Support/Valgrind.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/Valgrind.h"
 
 // Include the platform-specific parts of this class.
 #ifdef LLVM_ON_UNIX
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 0423c7acb3..79e1994991 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -15,20 +15,20 @@
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Config/config.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Errno.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/system_error.h"
 #include <cassert>
+#include <cerrno>
 #include <cstdio>
 #include <cstring>
-#include <cerrno>
 #include <new>
-#include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/types.h>
 #if !defined(_MSC_VER) && !defined(__MINGW32__)
 #include <unistd.h>
 #else
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index db4a56b692..c67af1b216 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Path.h"
-#include "llvm/Support/FileSystem.h"
 #include "llvm/Config/config.h"
-#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileSystem.h"
 #include <cassert>
 #include <cstring>
 #include <ostream>
diff --git a/lib/Support/PathV2.cpp b/lib/Support/PathV2.cpp
index 46571c049f..98d7382b4e 100644
--- a/lib/Support/PathV2.cpp
+++ b/lib/Support/PathV2.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/PathV2.h"
-#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
 #include <cctype>
 #include <cstdio>
 #include <cstring>
diff --git a/lib/Support/PluginLoader.cpp b/lib/Support/PluginLoader.cpp
index 2924cfa388..358137f08f 100644
--- a/lib/Support/PluginLoader.cpp
+++ b/lib/Support/PluginLoader.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DONT_GET_PLUGIN_LOADER_OPTION
-#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PluginLoader.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
 #include <vector>
 using namespace llvm;
 
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index ef3307317c..21d56adb5e 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -12,12 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Config/config.h"     // Get autoconf configuration settings
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Config/config.h"     // Get autoconf configuration settings
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/ThreadLocal.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
 
 #ifdef HAVE_CRASHREPORTERCLIENT_H
 #include <CrashReporterClient.h>
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index d293da07d6..efc8b90a00 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Regex.h"
+#include "regex_impl.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallVector.h"
-#include "regex_impl.h"
 #include <string>
 using namespace llvm;
 
@@ -27,7 +27,9 @@ Regex::Regex(StringRef regex, unsigned Flags) {
     flags |= REG_ICASE;
   if (Flags & Newline)
     flags |= REG_NEWLINE;
-  error = llvm_regcomp(preg, regex.data(), flags|REG_EXTENDED|REG_PEND);
+  if (!(Flags & BasicRegex))
+    flags |= REG_EXTENDED;
+  error = llvm_regcomp(preg, regex.data(), flags|REG_PEND);
 }
 
 Regex::~Regex() {
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index e4e01be038..6580137569 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -13,10 +13,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/Twine.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 using namespace llvm;
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index d8a6ad35ba..3a6522101d 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -22,13 +22,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Mutex.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cstring>
 using namespace llvm;
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index f8e9208462..d7a0bfa410 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -9,10 +9,9 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/APInt.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/edit_distance.h"
-
 #include <bitset>
 
 using namespace llvm;
diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp
index 7483225fdf..13fba2ea25 100644
--- a/lib/Support/Threading.cpp
+++ b/lib/Support/Threading.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Threading.h"
+#include "llvm/Config/config.h"
 #include "llvm/Support/Atomic.h"
 #include "llvm/Support/Mutex.h"
-#include "llvm/Config/config.h"
 #include <cassert>
 
 using namespace llvm;
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 598e8ad6a1..896d869aa1 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -12,15 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Timer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/Process.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 // CreateInfoOutputFile - Return a file stream to print our output on.
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 52bf722d53..eefb96bfee 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/Triple.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cstring>
 using namespace llvm;
@@ -125,7 +125,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case Haiku: return "haiku";
   case Minix: return "minix";
   case RTEMS: return "rtems";
-  case NativeClient: return "nacl";
+  case NaCl: return "nacl";
   case CNK: return "cnk";
   case Bitrig: return "bitrig";
   case AIX: return "aix";
@@ -272,7 +272,7 @@ static Triple::OSType parseOS(StringRef OSName) {
     .StartsWith("haiku", Triple::Haiku)
     .StartsWith("minix", Triple::Minix)
     .StartsWith("rtems", Triple::RTEMS)
-    .StartsWith("nacl", Triple::NativeClient)
+    .StartsWith("nacl", Triple::NaCl)
     .StartsWith("cnk", Triple::CNK)
     .StartsWith("bitrig", Triple::Bitrig)
     .StartsWith("aix", Triple::AIX)
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index 35e5120369..820041bccc 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -587,7 +587,7 @@ mapped_file_region::~mapped_file_region() {
     ::munmap(Mapping, Size);
 }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
 mapped_file_region::mapped_file_region(mapped_file_region &&other)
   : Mode(other.Mode), Size(other.Size), Mapping(other.Mapping) {
   other.Mapping = 0;
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index 049c41b742..9c34f2ba1a 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -16,9 +16,9 @@
 //===          is guaranteed to work on *all* UNIX variants.
 //===----------------------------------------------------------------------===//
 
-#include <llvm/Config/config.h>
-#include "llvm/Support/FileSystem.h"
 #include "Unix.h"
+#include "llvm/Support/FileSystem.h"
+#include <llvm/Config/config.h>
 #if HAVE_SYS_STAT_H
 #include <sys/stat.h>
 #endif
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index 264fa5dbde..7aa43f6dce 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -15,9 +15,9 @@
 #include "Unix.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Mutex.h"
+#include <algorithm>
 #include <string>
 #include <vector>
-#include <algorithm>
 #if HAVE_EXECINFO_H
 # include <execinfo.h>         // For backtrace().
 #endif
diff --git a/lib/Support/Unix/Unix.h b/lib/Support/Unix/Unix.h
index 361f297d36..051f56f969 100644
--- a/lib/Support/Unix/Unix.h
+++ b/lib/Support/Unix/Unix.h
@@ -21,12 +21,12 @@
 
 #include "llvm/Config/config.h"     // Get autoconf configuration settings
 #include "llvm/Support/Errno.h"
-#include <cstdlib>
+#include <algorithm>
+#include <cerrno>
 #include <cstdio>
+#include <cstdlib>
 #include <cstring>
-#include <cerrno>
 #include <string>
-#include <algorithm>
 
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
diff --git a/lib/Support/Windows/Memory.inc b/lib/Support/Windows/Memory.inc
index cb80f2817c..4c5aebd5e7 100644
--- a/lib/Support/Windows/Memory.inc
+++ b/lib/Support/Windows/Memory.inc
@@ -15,6 +15,8 @@
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Process.h"
+
+// The Windows.h header must be the last one included.
 #include "Windows.h"
 
 namespace {
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index 2280b34171..98d8a1850b 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -17,8 +17,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "Windows.h"
-#include <malloc.h>
 #include <cstdio>
+#include <malloc.h>
 
 // We need to undo a macro defined in Windows.h, otherwise we won't compile:
 #undef CopyFile
diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc
index 3dfac66b77..2e6cc96e7f 100644
--- a/lib/Support/Windows/PathV2.inc
+++ b/lib/Support/Windows/PathV2.inc
@@ -328,7 +328,7 @@ error_code resize_file(const Twine &path, uint64_t size) {
                                   path_utf16))
     return ec;
 
-  int fd = ::_wopen(path_utf16.begin(), O_BINARY, S_IREAD | S_IWRITE);
+  int fd = ::_wopen(path_utf16.begin(), O_BINARY | _O_RDWR, S_IWRITE);
   if (fd == -1)
     return error_code(errno, generic_category());
 #ifdef HAVE__CHSIZE_S
@@ -861,7 +861,7 @@ mapped_file_region::~mapped_file_region() {
     ::CloseHandle(FileHandle);
 }
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
 mapped_file_region::mapped_file_region(mapped_file_region &&other)
   : Mode(other.Mode)
   , Size(other.Size)
diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc
index e29eb6dff6..89dbf2378d 100644
--- a/lib/Support/Windows/Process.inc
+++ b/lib/Support/Windows/Process.inc
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "Windows.h"
-#include <psapi.h>
-#include <malloc.h>
-#include <io.h>
 #include <direct.h>
+#include <io.h>
+#include <malloc.h>
+#include <psapi.h>
 
 #ifdef __MINGW32__
  #if (HAVE_LIBPSAPI != 1)
diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc
index 80ccaa6ea6..b54608093b 100644
--- a/lib/Support/Windows/Program.inc
+++ b/lib/Support/Windows/Program.inc
@@ -13,9 +13,9 @@
 
 #include "Windows.h"
 #include <cstdio>
-#include <malloc.h>
-#include <io.h>
 #include <fcntl.h>
+#include <io.h>
+#include <malloc.h>
 
 //===----------------------------------------------------------------------===//
 //=== WARNING: Implementation here must contain only Win32 specific code
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index 38308f6abd..a969753217 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "Windows.h"
+#include <algorithm>
 #include <stdio.h>
 #include <vector>
-#include <algorithm>
 
 #ifdef __MINGW32__
  #include <imagehlp.h>
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index 2ae33f5b99..2cead20c0b 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -12,16 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/YAMLParser.h"
-
-#include "llvm/ADT/ilist.h"
-#include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 using namespace yaml;
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 7cd53648da..106864dd05 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/Program.h"
-#include "llvm/Support/Process.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
 #include "llvm/Support/system_error.h"
-#include "llvm/ADT/STLExtras.h"
 #include <cctype>
 #include <cerrno>
 #include <sys/stat.h>
diff --git a/lib/Support/regcomp.c b/lib/Support/regcomp.c
index 46c91a9c49..74d9186aaa 100644
--- a/lib/Support/regcomp.c
+++ b/lib/Support/regcomp.c
@@ -303,6 +303,7 @@ p_ere_exp(struct parse *p)
 	sopno pos;
 	int count;
 	int count2;
+	int backrefnum;
 	sopno subno;
 	int wascaret = 0;
 
@@ -370,7 +371,34 @@ p_ere_exp(struct parse *p)
 	case '\\':
 		REQUIRE(MORE(), REG_EESCAPE);
 		c = GETNEXT();
-		ordinary(p, c);
+		if (c >= '1' && c <= '9') {
+			/* \[0-9] is taken to be a back-reference to a previously specified
+			 * matching group. backrefnum will hold the number. The matching
+			 * group must exist (i.e. if \4 is found there must have been at
+			 * least 4 matching groups specified in the pattern previously).
+			 */
+			backrefnum = c - '0';
+			if (p->pend[backrefnum] == 0) {
+				SETERROR(REG_ESUBREG);
+				break;
+			}
+
+			/* Make sure everything checks out and emit the sequence
+			 * that marks a back-reference to the parse structure.
+			 */
+			assert(backrefnum <= p->g->nsub);
+			EMIT(OBACK_, backrefnum);
+			assert(p->pbegin[backrefnum] != 0);
+			assert(OP(p->strip[p->pbegin[backrefnum]]) != OLPAREN);
+			assert(OP(p->strip[p->pend[backrefnum]]) != ORPAREN);
+			(void) dupl(p, p->pbegin[backrefnum]+1, p->pend[backrefnum]);
+			EMIT(O_BACK, backrefnum);
+			p->g->backrefs = 1;
+		} else {
+			/* Other chars are simply themselves when escaped with a backslash.
+			 */
+			ordinary(p, c);
+		}
 		break;
 	case '{':		/* okay as ordinary except if digit follows */
 		REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
diff --git a/lib/Support/system_error.cpp b/lib/Support/system_error.cpp
index 2df223ca71..b22745afc3 100644
--- a/lib/Support/system_error.cpp
+++ b/lib/Support/system_error.cpp
@@ -13,8 +13,8 @@
 
 #include "llvm/Support/system_error.h"
 #include "llvm/Support/Errno.h"
-#include <string>
 #include <cstring>
+#include <string>
 
 namespace llvm {
 
diff --git a/lib/TableGen/Error.cpp b/lib/TableGen/Error.cpp
index 0bb86b0686..ec84a72454 100644
--- a/lib/TableGen/Error.cpp
+++ b/lib/TableGen/Error.cpp
@@ -15,7 +15,6 @@
 #include "llvm/TableGen/Error.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include <cstdlib>
 
 namespace llvm {
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index 11feb43542..9b65adcf85 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -12,17 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/TableGen/Record.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/TableGen/Error.h"
 
 using namespace llvm;
 
diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index ff322e74fb..d733f142aa 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -12,18 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "TGLexer.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h" // for strtoull()/strtoll() define
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/TableGen/Error.h"
 #include <cctype>
+#include <cerrno>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
-#include <cerrno>
-
-#include "llvm/Config/config.h" // for strtoull()/strtoll() define
 
 using namespace llvm;
 
diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h
index 8a850b5cec..e2e116bb82 100644
--- a/lib/TableGen/TGLexer.h
+++ b/lib/TableGen/TGLexer.h
@@ -15,9 +15,9 @@
 #define TGLEXER_H
 
 #include "llvm/Support/DataTypes.h"
+#include <cassert>
 #include <string>
 #include <vector>
-#include <cassert>
 
 namespace llvm {
 class MemoryBuffer;
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index b1f9f724ef..17f0abc974 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "TGParser.h"
-#include "llvm/TableGen/Record.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/TableGen/Record.h"
 #include <algorithm>
 #include <sstream>
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h
index 9c2ad43c42..0ea962b995 100644
--- a/lib/TableGen/TGParser.h
+++ b/lib/TableGen/TGParser.h
@@ -14,11 +14,11 @@
 #ifndef TGPARSER_H
 #define TGPARSER_H
 
-#include "llvm/TableGen/Record.h"
 #include "TGLexer.h"
-#include "llvm/TableGen/Error.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
 #include <map>
 
 namespace llvm {
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 23974ad905..5ea251a795 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -132,6 +132,11 @@ def HasV7Ops    : SubtargetFeature<"v7", "HasV7Ops", "true",
 include "ARMSchedule.td"
 
 // ARM processor families.
+def ProcA5      : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
+                                   "Cortex-A5 ARM processors",
+                                   [FeatureSlowFPBrcc, FeatureNEONForFP,
+                                    FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
+                                    FeatureT2XtPk]>;
 def ProcA8      : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
                                    "Cortex-A8 ARM processors",
                                    [FeatureSlowFPBrcc, FeatureNEONForFP,
@@ -219,6 +224,11 @@ def : Processor<"arm1156t2f-s",     ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
                                                        FeatureDSPThumb2]>;
 
 // V7a Processors.
+// FIXME: A5 has currently the same Schedule model as A8
+def : ProcessorModel<"cortex-a5",   CortexA8Model,
+                                    [ProcA5, HasV7Ops, FeatureNEON, FeatureDB,
+                                     FeatureVFP4, FeatureDSPThumb2,
+                                     FeatureHasRAS]>;
 def : ProcessorModel<"cortex-a8",   CortexA8Model,
                                     [ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
                                      FeatureDSPThumb2, FeatureHasRAS]>;
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 6bad258bcb..3d59374aee 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -25,31 +25,31 @@
 #include "MCTargetDesc/ARMMCExpr.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DebugInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstBuilder.h"
-#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Type.h"
 #include <cctype>
 using namespace llvm;
 
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 323b022ad4..2e90866f27 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -18,9 +18,7 @@
 #include "ARMHazardRecognizer.h"
 #include "ARMMachineFunctionInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -29,12 +27,14 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/STLExtras.h"
 
 #define GET_INSTRINFO_CTOR
 #include "ARMGenInstrInfo.inc"
@@ -106,7 +106,7 @@ CreateTargetHazardRecognizer(const TargetMachine *TM,
     const InstrItineraryData *II = TM->getInstrItineraryData();
     return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
   }
-  return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
+  return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
 }
 
 ScheduleHazardRecognizer *ARMBaseInstrInfo::
@@ -115,7 +115,7 @@ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
   if (Subtarget.isThumb2() || Subtarget.hasVFP2())
     return (ScheduleHazardRecognizer *)
       new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG);
-  return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG);
+  return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
 }
 
 MachineInstr *
@@ -1269,7 +1269,7 @@ reMaterialize(MachineBasicBlock &MBB,
 
 MachineInstr *
 ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
-  MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF);
+  MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF);
   switch(Orig->getOpcode()) {
   case ARM::tLDRpci_pic:
   case ARM::t2LDRpci_pic: {
@@ -1604,7 +1604,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
     // MOVCC AL can't be inverted. Shouldn't happen.
     if (CC == ARMCC::AL || PredReg != ARM::CPSR)
       return NULL;
-    MI = TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+    MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
     if (!MI)
       return NULL;
     // After swapping the MOVCC operands, also invert the condition.
@@ -1613,7 +1613,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
     return MI;
   }
   }
-  return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+  return TargetInstrInfo::commuteInstruction(MI, NewMI);
 }
 
 /// Identify instructions that can be folded into a MOVCC instruction, and
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 6f38e35124..2698132d2d 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -15,10 +15,10 @@
 #define ARMBASEINSTRUCTIONINFO_H
 
 #include "ARM.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #include "ARMGenInstrInfo.inc"
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 8f5be6a120..e814fda1f8 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -19,25 +19,26 @@
 #include "ARMSubtarget.h"
 #include "ARMTargetMachine.h"  // @LOCALMOD
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/CommandLine.h"
 
 #define GET_REGINFO_TARGET_DESC
 #include "ARMGenRegisterInfo.inc"
@@ -177,154 +178,62 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   }
 }
 
-/// getRawAllocationOrder - Returns the register allocation order for a
-/// specified register class with a target-dependent hint.
-ArrayRef<uint16_t>
-ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
-                                           unsigned HintType, unsigned HintReg,
-                                           const MachineFunction &MF) const {
-  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-  // Alternative register allocation orders when favoring even / odd registers
-  // of register pairs.
-
-  // No FP, R9 is available.
-  static const uint16_t GPREven1[] = {
-    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
-    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
-    ARM::R9, ARM::R11
-  };
-  static const uint16_t GPROdd1[] = {
-    ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
-    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
-    ARM::R8, ARM::R10
-  };
-
-  // FP is R7, R9 is available.
-  static const uint16_t GPREven2[] = {
-    ARM::R0, ARM::R2, ARM::R4,          ARM::R8, ARM::R10,
-    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
-    ARM::R9, ARM::R11
-  };
-  static const uint16_t GPROdd2[] = {
-    ARM::R1, ARM::R3, ARM::R5,          ARM::R9, ARM::R11,
-    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
-    ARM::R8, ARM::R10
-  };
-
-  // FP is R11, R9 is available.
-  static const uint16_t GPREven3[] = {
-    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
-    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
-    ARM::R9
-  };
-  static const uint16_t GPROdd3[] = {
-    ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
-    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
-    ARM::R8
-  };
-
-  // No FP, R9 is not available.
-  static const uint16_t GPREven4[] = {
-    ARM::R0, ARM::R2, ARM::R4, ARM::R6,          ARM::R10,
-    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
-    ARM::R11
-  };
-  static const uint16_t GPROdd4[] = {
-    ARM::R1, ARM::R3, ARM::R5, ARM::R7,          ARM::R11,
-    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
-    ARM::R10
-  };
-
-  // FP is R7, R9 is not available.
-  static const uint16_t GPREven5[] = {
-    ARM::R0, ARM::R2, ARM::R4,                   ARM::R10,
-    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
-    ARM::R11
-  };
-  static const uint16_t GPROdd5[] = {
-    ARM::R1, ARM::R3, ARM::R5,                   ARM::R11,
-    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
-    ARM::R10
-  };
-
-  // FP is R11, R9 is not available.
-  static const uint16_t GPREven6[] = {
-    ARM::R0, ARM::R2, ARM::R4, ARM::R6,
-    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
-  };
-  static const uint16_t GPROdd6[] = {
-    ARM::R1, ARM::R3, ARM::R5, ARM::R7,
-    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
-  };
-
-  // We only support even/odd hints for GPR and rGPR.
-  if (RC != &ARM::GPRRegClass && RC != &ARM::rGPRRegClass)
-    return RC->getRawAllocationOrder(MF);
-
-  if (HintType == ARMRI::RegPairEven) {
-    if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
-      // It's no longer possible to fulfill this hint. Return the default
-      // allocation order.
-      return RC->getRawAllocationOrder(MF);
-
-    if (!TFI->hasFP(MF)) {
-      if (!STI.isR9Reserved())
-        return makeArrayRef(GPREven1);
-      else
-        return makeArrayRef(GPREven4);
-    } else if (FramePtr == ARM::R7) {
-      if (!STI.isR9Reserved())
-        return makeArrayRef(GPREven2);
-      else
-        return makeArrayRef(GPREven5);
-    } else { // FramePtr == ARM::R11
-      if (!STI.isR9Reserved())
-        return makeArrayRef(GPREven3);
-      else
-        return makeArrayRef(GPREven6);
-    }
-  } else if (HintType == ARMRI::RegPairOdd) {
-    if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
-      // It's no longer possible to fulfill this hint. Return the default
-      // allocation order.
-      return RC->getRawAllocationOrder(MF);
-
-    if (!TFI->hasFP(MF)) {
-      if (!STI.isR9Reserved())
-        return makeArrayRef(GPROdd1);
-      else
-        return makeArrayRef(GPROdd4);
-    } else if (FramePtr == ARM::R7) {
-      if (!STI.isR9Reserved())
-        return makeArrayRef(GPROdd2);
-      else
-        return makeArrayRef(GPROdd5);
-    } else { // FramePtr == ARM::R11
-      if (!STI.isR9Reserved())
-        return makeArrayRef(GPROdd3);
-      else
-        return makeArrayRef(GPROdd6);
-    }
-  }
-  return RC->getRawAllocationOrder(MF);
+// Get the other register in a GPRPair.
+static unsigned getPairedGPR(unsigned Reg, bool Odd, const MCRegisterInfo *RI) {
+  for (MCSuperRegIterator Supers(Reg, RI); Supers.isValid(); ++Supers)
+    if (ARM::GPRPairRegClass.contains(*Supers))
+      return RI->getSubReg(*Supers, Odd ? ARM::gsub_1 : ARM::gsub_0);
+  return 0;
 }
 
-/// ResolveRegAllocHint - Resolves the specified register allocation hint
-/// to a physical register. Returns the physical register if it is successful.
-unsigned
-ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
-                                         const MachineFunction &MF) const {
-  if (Reg == 0 || !isPhysicalRegister(Reg))
-    return 0;
-  if (Type == 0)
-    return Reg;
-  else if (Type == (unsigned)ARMRI::RegPairOdd)
-    // Odd register.
-    return getRegisterPairOdd(Reg, MF);
-  else if (Type == (unsigned)ARMRI::RegPairEven)
-    // Even register.
-    return getRegisterPairEven(Reg, MF);
-  return 0;
+// Resolve the RegPairEven / RegPairOdd register allocator hints.
+void
+ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
+                                           ArrayRef<MCPhysReg> Order,
+                                           SmallVectorImpl<MCPhysReg> &Hints,
+                                           const MachineFunction &MF,
+                                           const VirtRegMap *VRM) const {
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
+
+  unsigned Odd;
+  switch (Hint.first) {
+  case ARMRI::RegPairEven:
+    Odd = 0;
+    break;
+  case ARMRI::RegPairOdd:
+    Odd = 1;
+    break;
+  default:
+    TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM);
+    return;
+  }
+
+  // This register should preferably be even (Odd == 0) or odd (Odd == 1).
+  // Check if the other part of the pair has already been assigned, and provide
+  // the paired register as the first hint.
+  unsigned PairedPhys = 0;
+  if (VRM && VRM->hasPhys(Hint.second)) {
+    PairedPhys = getPairedGPR(VRM->getPhys(Hint.second), Odd, this);
+    if (PairedPhys && MRI.isReserved(PairedPhys))
+      PairedPhys = 0;
+  }
+
+  // First prefer the paired physreg.
+  if (PairedPhys)
+    Hints.push_back(PairedPhys);
+
+  // Then prefer even or odd registers.
+  for (unsigned I = 0, E = Order.size(); I != E; ++I) {
+    unsigned Reg = Order[I];
+    if (Reg == PairedPhys || (getEncodingValue(Reg) & 1) != Odd)
+      continue;
+    // Don't provide hints that are paired to a reserved register.
+    unsigned Paired = getPairedGPR(Reg, !Odd, this);
+    if (!Paired || MRI.isReserved(Paired))
+      continue;
+    Hints.push_back(Reg);
+  }
 }
 
 void
@@ -468,114 +377,6 @@ unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
   llvm_unreachable("What is the exception handler register");
 }
 
-unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
-                                              const MachineFunction &MF) const {
-  const MachineRegisterInfo &MRI = MF.getRegInfo();
-  switch (Reg) {
-  default: break;
-  // Return 0 if either register of the pair is a special register.
-  // So no R12, etc.
-  case ARM::R1: return ARM::R0;
-  case ARM::R3: return ARM::R2;
-  case ARM::R5: return ARM::R4;
-  case ARM::R7:
-    return (MRI.isReserved(ARM::R7) || MRI.isReserved(ARM::R6))
-      ? 0 : ARM::R6;
-  case ARM::R9: return MRI.isReserved(ARM::R9)  ? 0 :ARM::R8;
-  case ARM::R11: return MRI.isReserved(ARM::R11) ? 0 : ARM::R10;
-
-  case ARM::S1: return ARM::S0;
-  case ARM::S3: return ARM::S2;
-  case ARM::S5: return ARM::S4;
-  case ARM::S7: return ARM::S6;
-  case ARM::S9: return ARM::S8;
-  case ARM::S11: return ARM::S10;
-  case ARM::S13: return ARM::S12;
-  case ARM::S15: return ARM::S14;
-  case ARM::S17: return ARM::S16;
-  case ARM::S19: return ARM::S18;
-  case ARM::S21: return ARM::S20;
-  case ARM::S23: return ARM::S22;
-  case ARM::S25: return ARM::S24;
-  case ARM::S27: return ARM::S26;
-  case ARM::S29: return ARM::S28;
-  case ARM::S31: return ARM::S30;
-
-  case ARM::D1: return ARM::D0;
-  case ARM::D3: return ARM::D2;
-  case ARM::D5: return ARM::D4;
-  case ARM::D7: return ARM::D6;
-  case ARM::D9: return ARM::D8;
-  case ARM::D11: return ARM::D10;
-  case ARM::D13: return ARM::D12;
-  case ARM::D15: return ARM::D14;
-  case ARM::D17: return ARM::D16;
-  case ARM::D19: return ARM::D18;
-  case ARM::D21: return ARM::D20;
-  case ARM::D23: return ARM::D22;
-  case ARM::D25: return ARM::D24;
-  case ARM::D27: return ARM::D26;
-  case ARM::D29: return ARM::D28;
-  case ARM::D31: return ARM::D30;
-  }
-
-  return 0;
-}
-
-unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
-                                             const MachineFunction &MF) const {
-  const MachineRegisterInfo &MRI = MF.getRegInfo();
-  switch (Reg) {
-  default: break;
-  // Return 0 if either register of the pair is a special register.
-  // So no R12, etc.
-  case ARM::R0: return ARM::R1;
-  case ARM::R2: return ARM::R3;
-  case ARM::R4: return ARM::R5;
-  case ARM::R6:
-    return (MRI.isReserved(ARM::R7) || MRI.isReserved(ARM::R6))
-      ? 0 : ARM::R7;
-  case ARM::R8: return MRI.isReserved(ARM::R9)  ? 0 :ARM::R9;
-  case ARM::R10: return MRI.isReserved(ARM::R11) ? 0 : ARM::R11;
-
-  case ARM::S0: return ARM::S1;
-  case ARM::S2: return ARM::S3;
-  case ARM::S4: return ARM::S5;
-  case ARM::S6: return ARM::S7;
-  case ARM::S8: return ARM::S9;
-  case ARM::S10: return ARM::S11;
-  case ARM::S12: return ARM::S13;
-  case ARM::S14: return ARM::S15;
-  case ARM::S16: return ARM::S17;
-  case ARM::S18: return ARM::S19;
-  case ARM::S20: return ARM::S21;
-  case ARM::S22: return ARM::S23;
-  case ARM::S24: return ARM::S25;
-  case ARM::S26: return ARM::S27;
-  case ARM::S28: return ARM::S29;
-  case ARM::S30: return ARM::S31;
-
-  case ARM::D0: return ARM::D1;
-  case ARM::D2: return ARM::D3;
-  case ARM::D4: return ARM::D5;
-  case ARM::D6: return ARM::D7;
-  case ARM::D8: return ARM::D9;
-  case ARM::D10: return ARM::D11;
-  case ARM::D12: return ARM::D13;
-  case ARM::D14: return ARM::D15;
-  case ARM::D16: return ARM::D17;
-  case ARM::D18: return ARM::D19;
-  case ARM::D20: return ARM::D21;
-  case ARM::D22: return ARM::D23;
-  case ARM::D24: return ARM::D25;
-  case ARM::D26: return ARM::D27;
-  case ARM::D28: return ARM::D29;
-  case ARM::D30: return ARM::D31;
-  }
-
-  return 0;
-}
-
 /// emitLoadConstPool - Emits a load from constpool to materialize the
 /// specified immediate.
 void ARMBaseRegisterInfo::
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index e2bdd046db..aaa56a9c14 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -111,12 +111,11 @@ public:
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const;
 
-  ArrayRef<uint16_t> getRawAllocationOrder(const TargetRegisterClass *RC,
-                                           unsigned HintType, unsigned HintReg,
-                                           const MachineFunction &MF) const;
-
-  unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
-                               const MachineFunction &MF) const;
+  void getRegAllocationHints(unsigned VirtReg,
+                             ArrayRef<MCPhysReg> Order,
+                             SmallVectorImpl<MCPhysReg> &Hints,
+                             const MachineFunction &MF,
+                             const VirtRegMap *VRM) const;
 
   void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
                           MachineFunction &MF) const;
@@ -175,11 +174,6 @@ public:
 
   virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
                                    int SPAdj, RegScavenger *RS = NULL) const;
-
-private:
-  unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
-
-  unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 6adbf4f27e..4b6a768e89 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -14,16 +14,13 @@
 
 #define DEBUG_TYPE "jit"
 #include "ARM.h"
-#include "ARMConstantPoolValue.h"
 #include "ARMBaseInstrInfo.h"
+#include "ARMConstantPoolValue.h"
 #include "ARMRelocations.h"
 #include "ARMSubtarget.h"
 #include "ARMTargetMachine.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/PassManager.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -31,7 +28,10 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index be19a20182..54ccbdddaa 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -16,23 +16,23 @@
 #define DEBUG_TYPE "arm-cp-islands"
 #include "ARM.h"
 #include "ARMMachineFunctionInfo.h"
-#include "Thumb2InstrInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index fa3226e37e..1820b323f8 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -13,12 +13,12 @@
 
 #include "ARMConstantPoolValue.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/Constant.h"
 #include "llvm/Constants.h"
 #include "llvm/GlobalValue.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Type.h"
 #include <cstdlib>
 using namespace llvm;
 
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 348f234f5c..0fa3fe9bc8 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -23,11 +23,11 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetOptions.h" // @LOCALMOD for llvm::TLSUseCall
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 static cl::opt<bool>
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 033540ae7d..9cacf1b000 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -16,31 +16,31 @@
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMCallingConv.h"
-#include "ARMTargetMachine.h"
-#include "ARMSubtarget.h"
 #include "ARMConstantPoolValue.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/CallingConv.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
@@ -186,7 +186,8 @@ class ARMFastISel : public FastISel {
     bool ARMComputeAddress(const Value *Obj, Address &Addr);
     void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3);
     bool ARMIsMemCpySmall(uint64_t Len);
-    bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len);
+    bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
+                               unsigned Alignment);
     unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt);
     unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
     unsigned ARMMaterializeInt(const Constant *C, EVT VT);
@@ -563,7 +564,9 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
   const ConstantInt *CI = cast<ConstantInt>(C);
   if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
     unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
-    unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+    const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
+      &ARM::GPRRegClass;
+    unsigned ImmReg = createResultReg(RC);
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                             TII.get(Opc), ImmReg)
                     .addImm(CI->getZExtValue()));
@@ -1670,7 +1673,6 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
 
   // Things need to be register sized for register moves.
   if (VT != MVT::i32) return false;
-  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
 
   unsigned CondReg = getRegForValue(I->getOperand(0));
   if (CondReg == 0) return false;
@@ -1703,14 +1705,16 @@ bool ARMFastISel::SelectSelect(const Instruction *I) {
                   .addReg(CondReg).addImm(0));
 
   unsigned MovCCOpc;
+  const TargetRegisterClass *RC;
   if (!UseImm) {
+    RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
     MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
   } else {
-    if (!isNegativeImm) {
+    RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
+    if (!isNegativeImm)
       MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
-    } else {
+    else
       MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
-    }
   }
   unsigned ResultReg = createResultReg(RC);
   if (!UseImm)
@@ -2285,6 +2289,9 @@ bool ARMFastISel::SelectCall(const Instruction *I,
   // Can't handle inline asm.
   if (isa<InlineAsm>(Callee)) return false;
 
+  // Allow SelectionDAG isel to handle tail calls.
+  if (CI->isTailCall()) return false;
+
   // Check the calling convention.
   ImmutableCallSite CS(CI);
   CallingConv::ID CC = CS.getCallingConv();
@@ -2424,21 +2431,30 @@ bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
 }
 
 bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
-                                        uint64_t Len) {
+                                        uint64_t Len, unsigned Alignment) {
   // Make sure we don't bloat code by inlining very large memcpy's.
   if (!ARMIsMemCpySmall(Len))
     return false;
 
-  // We don't care about alignment here since we just emit integer accesses.
   while (Len) {
     MVT VT;
-    if (Len >= 4)
-      VT = MVT::i32;
-    else if (Len >= 2)
-      VT = MVT::i16;
-    else {
-      assert(Len == 1);
-      VT = MVT::i8;
+    if (!Alignment || Alignment >= 4) {
+      if (Len >= 4)
+        VT = MVT::i32;
+      else if (Len >= 2)
+        VT = MVT::i16;
+      else {
+        assert (Len == 1 && "Expected a length of 1!");
+        VT = MVT::i8;
+      }
+    } else {
+      // Bound based on alignment.
+      if (Len >= 2 && Alignment == 2)
+        VT = MVT::i16;
+      else {
+        assert (Alignment == 1 && "Expected an alignment of 1!");
+        VT = MVT::i8;
+      }
     }
 
     bool RV;
@@ -2517,7 +2533,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
         if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
             !ARMComputeAddress(MTI.getRawSource(), Src))
           return false;
-        if (ARMTryEmitSmallMemCpy(Dest, Src, Len))
+        unsigned Alignment = MTI.getAlignment();
+        if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
           return true;
       }
     }
@@ -2582,11 +2599,13 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
 
   unsigned Opc;
   bool isBoolZext = false;
+  const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32);
   if (!SrcVT.isSimple()) return 0;
   switch (SrcVT.getSimpleVT().SimpleTy) {
   default: return 0;
   case MVT::i16:
     if (!Subtarget->hasV6Ops()) return 0;
+    RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
     if (isZExt)
       Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH;
     else
@@ -2594,6 +2613,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
     break;
   case MVT::i8:
     if (!Subtarget->hasV6Ops()) return 0;
+    RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
     if (isZExt)
       Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB;
     else
@@ -2601,6 +2621,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
     break;
   case MVT::i1:
     if (isZExt) {
+      RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
       Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
       isBoolZext = true;
       break;
@@ -2608,7 +2629,7 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT,
     return 0;
   }
 
-  unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32));
+  unsigned ResultReg = createResultReg(RC);
   MachineInstrBuilder MIB;
   MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
         .addReg(SrcReg);
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index f9ab225fe5..e335141d24 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -15,17 +15,17 @@
 #include "ARMBaseInstrInfo.h"
 #include "ARMBaseRegisterInfo.h"
 #include "ARMMachineFunctionInfo.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Function.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Function.h"
+#include "llvm/CallingConv.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Function.h"
+#include "llvm/Function.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
 // @LOCALMOD-START
 #include "llvm/CodeGen/MachineModuleInfo.h"
 // @LOCALMOD-END
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 8135d58aff..00214663c7 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -17,23 +17,23 @@
 #include "ARMTargetMachine.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
 
 // @LOCALMOD-START
 #include "llvm/Support/CommandLine.h"
@@ -3428,6 +3428,15 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
     return SelectAtomic64(N, ARM::ATOMSWAP6432);
   case ARMISD::ATOMCMPXCHG64_DAG:
     return SelectAtomic64(N, ARM::ATOMCMPXCHG6432);
+
+  case ARMISD::ATOMMIN64_DAG:
+    return SelectAtomic64(N, ARM::ATOMMIN6432);
+  case ARMISD::ATOMUMIN64_DAG:
+    return SelectAtomic64(N, ARM::ATOMUMIN6432);
+  case ARMISD::ATOMMAX64_DAG:
+    return SelectAtomic64(N, ARM::ATOMMAX6432);
+  case ARMISD::ATOMUMAX64_DAG:
+    return SelectAtomic64(N, ARM::ATOMUMAX6432);
   }
 
   return SelectCode(N);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index e1f5aa3fed..45d52b62ac 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -23,14 +23,9 @@
 #include "ARMTargetMachine.h"
 #include "ARMTargetObjectFile.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instruction.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Type.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/IntrinsicLowering.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -40,14 +35,19 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/MC/MCSectionMachO.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Type.h"
 
 // @LOCALMOD-START
 namespace llvm {
@@ -556,6 +556,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::FP_ROUND,   MVT::v2f32, Expand);
     setOperationAction(ISD::FP_EXTEND,  MVT::v2f64, Expand);
 
+    // NEON does not have single instruction CTPOP for vectors with element
+    // types wider than 8-bits.  However, custom lowering can leverage the
+    // v8i8/v16i8 vcnt instruction.
+    setOperationAction(ISD::CTPOP,      MVT::v2i32, Custom);
+    setOperationAction(ISD::CTPOP,      MVT::v4i32, Custom);
+    setOperationAction(ISD::CTPOP,      MVT::v4i16, Custom);
+    setOperationAction(ISD::CTPOP,      MVT::v8i16, Custom);
+
     setTargetDAGCombine(ISD::INTRINSIC_VOID);
     setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
@@ -724,7 +732,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i64, Custom);
     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i64, Custom);
     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Custom);
-    setOperationAction(ISD::ATOMIC_SWAP,  MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_MIN,  MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_MAX,  MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i64, Custom);
     // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
     setInsertFencesForAtomic(true);
@@ -862,9 +874,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setSchedulingPreference(Sched::Hybrid);
 
   //// temporary - rewrite interface to use type
-  maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
-  maxStoresPerMemset = 16;
+  maxStoresPerMemset = 8;
   maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+  maxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
+  maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+  maxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
+  maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
 
   // On ARM arguments smaller than 4 bytes are extended, so all arguments
   // are at least 4 bytes aligned.
@@ -1950,6 +1965,17 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   return true;
 }
 
+bool
+ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+                                  MachineFunction &MF, bool isVarArg,
+                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  LLVMContext &Context) const {
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+  return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
+                                                    isVarArg));
+}
+
 SDValue
 ARMTargetLowering::LowerReturn(SDValue Chain,
                                CallingConv::ID CallConv, bool isVarArg,
@@ -3777,6 +3803,114 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
 }
 
+/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
+/// for each 16-bit element from operand, repeated.  The basic idea is to
+/// leverage vcnt to get the 8-bit counts, gather and add the results.
+///
+/// Trace for v4i16:
+/// input    = [v0    v1    v2    v3   ] (vi 16-bit element)
+/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
+/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
+/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6] 
+///            [b0 b1 b2 b3 b4 b5 b6 b7]
+///           +[b1 b0 b3 b2 b5 b4 b7 b6]
+/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
+/// vuzp:    = [k0 k1 k2 k3 k0 k1 k2 k3]  each ki is 8-bits)
+static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
+  SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
+  SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
+  SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
+  SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
+  return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
+}
+
+/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
+/// bit-count for each 16-bit element from the operand.  We need slightly
+/// different sequencing for v4i16 and v8i16 to stay within NEON's available
+/// 64/128-bit registers.
+/// 
+/// Trace for v4i16:
+/// input           = [v0    v1    v2    v3    ] (vi 16-bit element)
+/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
+/// v8i16:Extended  = [k0    k1    k2    k3    k0    k1    k2    k3    ]
+/// v4i16:Extracted = [k0    k1    k2    k3    ]
+static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
+  if (VT.is64BitVector()) {
+    SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
+                       DAG.getIntPtrConstant(0));
+  } else {
+    SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
+                                    BitCounts, DAG.getIntPtrConstant(0));
+    return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
+  }
+}
+
+/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
+/// bit-count for each 32-bit element from the operand.  The idea here is
+/// to split the vector into 16-bit elements, leverage the 16-bit count
+/// routine, and then combine the results.
+///
+/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
+/// input    = [v0    v1    ] (vi: 32-bit elements)
+/// Bitcast  = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
+/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
+/// vrev: N0 = [k1 k0 k3 k2 ] 
+///            [k0 k1 k2 k3 ]
+///       N1 =+[k1 k0 k3 k2 ]
+///            [k0 k2 k1 k3 ]
+///       N2 =+[k1 k3 k0 k2 ]
+///            [k0    k2    k1    k3    ]
+/// Extended =+[k1    k3    k0    k2    ]
+///            [k0    k2    ]
+/// Extracted=+[k1    k3    ]
+///
+static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
+
+  SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
+  SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
+  SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
+  SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
+  SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
+
+  if (VT.is64BitVector()) {
+    SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
+                       DAG.getIntPtrConstant(0));
+  } else {
+    SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
+                                    DAG.getIntPtrConstant(0));
+    return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
+  }
+}
+
+static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
+                          const ARMSubtarget *ST) {
+  EVT VT = N->getValueType(0);
+
+  assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
+  assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
+          VT == MVT::v4i16 || VT == MVT::v8i16) &&
+         "Unexpected type for custom ctpop lowering");
+
+  if (VT.getVectorElementType() == MVT::i32)
+    return lowerCTPOP32BitElements(N, DAG);
+  else
+    return lowerCTPOP16BitElements(N, DAG);
+}
+
 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
                           const ARMSubtarget *ST) {
   EVT VT = N->getValueType(0);
@@ -5162,16 +5296,76 @@ static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
   return false;
 }
 
-/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending
-/// load, or BUILD_VECTOR with extended elements, return the unextended value.
-static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
+/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
+/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
+/// We insert the required extension here to get the vector to fill a D register.
+static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
+                                            const EVT &OrigTy,
+                                            const EVT &ExtTy,
+                                            unsigned ExtOpcode) {
+  // The vector originally had a size of OrigTy. It was then extended to ExtTy.
+  // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
+  // 64-bits we need to insert a new extension so that it will be 64-bits.
+  assert(ExtTy.is128BitVector() && "Unexpected extension size");
+  if (OrigTy.getSizeInBits() >= 64)
+    return N;
+
+  // Must extend size to at least 64 bits to be used as an operand for VMULL.
+  MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy;
+  EVT NewVT;
+  switch (OrigSimpleTy) {
+  default: llvm_unreachable("Unexpected Orig Vector Type");
+  case MVT::v2i8:
+  case MVT::v2i16:
+    NewVT = MVT::v2i32;
+    break;
+  case MVT::v4i8:
+    NewVT = MVT::v4i16;
+    break;
+  }
+  return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N);
+}
+
+/// SkipLoadExtensionForVMULL - return a load of the original vector size that
+/// does not do any sign/zero extension. If the original vector is less
+/// than 64 bits, an appropriate extension will be added after the load to
+/// reach a total size of 64 bits. We have to add the extension separately
+/// because ARM does not have a sign/zero extending load for vectors.
+static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
+  SDValue NonExtendingLoad =
+    DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
+                LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
+                LD->isNonTemporal(), LD->isInvariant(),
+                LD->getAlignment());
+  unsigned ExtOp = 0;
+  switch (LD->getExtensionType()) {
+  default: llvm_unreachable("Unexpected LoadExtType");
+  case ISD::EXTLOAD:
+  case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break;
+  case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break;
+  }
+  MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy;
+  MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy;
+  return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG,
+                                      MemType, ExtType, ExtOp);
+}
+
+/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
+/// extending load, or BUILD_VECTOR with extended elements, return the
+/// unextended value. The unextended vector should be 64 bits so that it can
+/// be used as an operand to a VMULL instruction. If the original vector size
+/// before extension is less than 64 bits we add a an extension to resize
+/// the vector to 64 bits.
+static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
   if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
-    return N->getOperand(0);
+    return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
+                                        N->getOperand(0)->getValueType(0),
+                                        N->getValueType(0),
+                                        N->getOpcode());
+
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
-    return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
-                       LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
-                       LD->isNonTemporal(), LD->isInvariant(),
-                       LD->getAlignment());
+    return SkipLoadExtensionForVMULL(LD, DAG);
+
   // Otherwise, the value must be a BUILD_VECTOR.  For v2i64, it will
   // have been legalized as a BITCAST from v4i32.
   if (N->getOpcode() == ISD::BITCAST) {
@@ -5226,7 +5420,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
   // Multiplications are only custom-lowered for 128-bit vectors so that
   // VMULL can be detected.  Otherwise v2i64 multiplications are not legal.
   EVT VT = Op.getValueType();
-  assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
+  assert(VT.is128BitVector() && VT.isInteger() &&
+         "unexpected type for custom-lowering ISD::MUL");
   SDNode *N0 = Op.getOperand(0).getNode();
   SDNode *N1 = Op.getOperand(1).getNode();
   unsigned NewOpc = 0;
@@ -5269,9 +5464,9 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
   // Legalize to a VMULL instruction.
   DebugLoc DL = Op.getDebugLoc();
   SDValue Op0;
-  SDValue Op1 = SkipExtension(N1, DAG);
+  SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
   if (!isMLA) {
-    Op0 = SkipExtension(N0, DAG);
+    Op0 = SkipExtensionForVMULL(N0, DAG);
     assert(Op0.getValueType().is64BitVector() &&
            Op1.getValueType().is64BitVector() &&
            "unexpected types for extended operands to VMULL");
@@ -5286,8 +5481,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
   //   vaddl q0, d4, d5
   //   vmovl q1, d6
   //   vmul  q0, q0, q1
-  SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG);
-  SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG);
+  SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
+  SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
   EVT Op1VT = Op1.getValueType();
   return DAG.getNode(N0->getOpcode(), DL, VT,
                      DAG.getNode(NewOpc, DL, VT,
@@ -5617,6 +5812,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SRL_PARTS:
   case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
   case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
+  case ISD::CTPOP:         return LowerCTPOP(Op.getNode(), DAG, Subtarget);
   case ISD::SETCC:         return LowerVSETCC(Op, DAG);
   case ISD::ConstantFP:    return LowerConstantFP(Op, DAG, Subtarget);
   case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
@@ -5682,6 +5878,18 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
   case ISD::ATOMIC_CMP_SWAP:
     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
     return;
+  case ISD::ATOMIC_LOAD_MIN:
+    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG);
+    return;
+  case ISD::ATOMIC_LOAD_UMIN:
+    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG);
+    return;
+  case ISD::ATOMIC_LOAD_MAX:
+    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG);
+    return;
+  case ISD::ATOMIC_LOAD_UMAX:
+    ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG);
+    return;
   }
   if (Res.getNode())
     Results.push_back(Res);
@@ -6021,7 +6229,8 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
 MachineBasicBlock *
 ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
                                       unsigned Op1, unsigned Op2,
-                                      bool NeedsCarry, bool IsCmpxchg) const {
+                                      bool NeedsCarry, bool IsCmpxchg,
+                                      bool IsMinMax, ARMCC::CondCodes CC) const {
   // This also handles ATOMIC_SWAP, indicated by Op1==0.
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
 
@@ -6050,16 +6259,15 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
 
   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *contBB = 0, *cont2BB = 0;
-  if (IsCmpxchg) {
+  if (IsCmpxchg || IsMinMax)
     contBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  if (IsCmpxchg)
     cont2BB = MF->CreateMachineBasicBlock(LLVM_BB);
-  }
   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
   MF->insert(It, loopMBB);
-  if (IsCmpxchg) {
-    MF->insert(It, contBB);
-    MF->insert(It, cont2BB);
-  }
+  if (IsCmpxchg || IsMinMax) MF->insert(It, contBB);
+  if (IsCmpxchg) MF->insert(It, cont2BB);
   MF->insert(It, exitMBB);
 
   // Transfer the remainder of BB and its successor edges to exitMBB.
@@ -6097,6 +6305,22 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
   // Load
   unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
   unsigned GPRPair1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+  unsigned GPRPair2;
+  if (IsMinMax) {
+    //We need an extra double register for doing min/max.
+    unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+    unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+    GPRPair2 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
+    BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef);
+    BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
+      .addReg(undef)
+      .addReg(vallo)
+      .addImm(ARM::gsub_0);
+    BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair2)
+      .addReg(r1)
+      .addReg(valhi)
+      .addImm(ARM::gsub_1);
+  }
 
   AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc))
                  .addReg(GPRPair0, RegState::Define).addReg(ptr));
@@ -6142,7 +6366,8 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
         .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry));
     unsigned tmpRegHi = MRI.createVirtualRegister(TRC);
     AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi)
-                   .addReg(desthi).addReg(valhi)).addReg(0);
+                   .addReg(desthi).addReg(valhi))
+        .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax));
 
     unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
     BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
@@ -6169,10 +6394,20 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
       .addReg(valhi)
       .addImm(ARM::gsub_1);
   }
+  unsigned GPRPairStore = GPRPair1;
+  if (IsMinMax) {
+    // Compare and branch to exit block.
+    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+      .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR);
+    BB->addSuccessor(exitMBB);
+    BB->addSuccessor(contBB);
+    BB = contBB;
+    GPRPairStore = GPRPair2;
+  }
 
   // Store
   AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess)
-                 .addReg(GPRPair1).addReg(ptr));
+                 .addReg(GPRPairStore).addReg(ptr));
   // Cmp+jump
   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
                  .addReg(storesuccess).addImm(0));
@@ -7174,6 +7409,26 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
                               isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
                               /*NeedsCarry*/ false, /*IsCmpxchg*/true);
+  case ARM::ATOMMIN6432:
+    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+                              isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+                              /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+                              /*IsMinMax*/ true, ARMCC::LE);
+  case ARM::ATOMMAX6432:
+    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+                              isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+                              /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+                              /*IsMinMax*/ true, ARMCC::GE);
+  case ARM::ATOMUMIN6432:
+    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+                              isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+                              /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+                              /*IsMinMax*/ true, ARMCC::LS);
+  case ARM::ATOMUMAX6432:
+    return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
+                              isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
+                              /*NeedsCarry*/ true, /*IsCmpxchg*/false,
+                              /*IsMinMax*/ true, ARMCC::HS);
 
   case ARM::tMOVCCr_pseudo: {
     // To "insert" a SELECT_CC instruction, we actually have to insert the
@@ -9445,7 +9700,7 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
   return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
 }
 
-bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
   // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
   bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
 
@@ -9454,15 +9709,27 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
     return false;
   case MVT::i8:
   case MVT::i16:
-  case MVT::i32:
+  case MVT::i32: {
     // Unaligned access can use (for example) LRDB, LRDH, LDR
-    return AllowsUnaligned;
+    if (AllowsUnaligned) {
+      if (Fast)
+        *Fast = Subtarget->hasV7Ops();
+      return true;
+    }
+    return false;
+  }
   case MVT::f64:
-  case MVT::v2f64:
+  case MVT::v2f64: {
     // For any little-endian targets with neon, we can support unaligned ld/st
     // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
     // A big-endian target may also explictly support unaligned accesses
-    return Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian());
+    if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) {
+      if (Fast)
+        *Fast = true;
+      return true;
+    }
+    return false;
+  }
   }
 }
 
@@ -9481,12 +9748,17 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
 
   // See if we can use NEON instructions for this...
   if (IsZeroVal &&
-      !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat) &&
-      Subtarget->hasNEON()) {
-    if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
-      return MVT::v4i32;
-    } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) {
-      return MVT::v2i32;
+      Subtarget->hasNEON() &&
+      !F->getFnAttributes().hasAttribute(Attributes::NoImplicitFloat)) {
+    bool Fast;
+    if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) ||
+                       (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) &&
+                        Fast))) {
+      return MVT::v2f64;
+    } else if (Size >= 8 && (memOpAlign(SrcAlign, DstAlign, 8) ||
+                             (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) &&
+                              Fast))) {
+      return MVT::f64;
     }
   }
 
@@ -9501,6 +9773,27 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
   return MVT::Other;
 }
 
+bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+  if (Val.getOpcode() != ISD::LOAD)
+    return false;
+
+  EVT VT1 = Val.getValueType();
+  if (!VT1.isSimple() || !VT1.isInteger() ||
+      !VT2.isSimple() || !VT2.isInteger())
+    return false;
+
+  switch (VT1.getSimpleVT().SimpleTy) {
+  default: break;
+  case MVT::i1:
+  case MVT::i8:
+  case MVT::i16:
+    // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
+    return true;
+  }
+
+  return false;
+}
+
 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
   if (V < 0)
     return false;
@@ -10277,6 +10570,24 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
   return false;
 }
 
+bool ARMTargetLowering::isIntImmLegal(const APInt &Imm, EVT VT) const {
+  if (VT.getSizeInBits() > 32)
+    return false;
+
+  int32_t ImmVal = Imm.getSExtValue();
+  if (!Subtarget->isThumb()) {
+    return (ImmVal >= 0 && ImmVal < 65536) ||
+      (ARM_AM::getSOImmVal(ImmVal) != -1) ||
+      (ARM_AM::getSOImmVal(~ImmVal) != -1);
+  } else if (Subtarget->isThumb2()) {
+    return (ImmVal >= 0 && ImmVal < 65536) ||
+      (ARM_AM::getT2SOImmVal(ImmVal) != -1) ||
+      (ARM_AM::getT2SOImmVal(~ImmVal) != -1);
+  } else /*Thumb1*/ {
+    return (ImmVal >= 0 && ImmVal < 256);
+  }
+}
+
 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
 /// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
 /// specified in the intrinsic calls.
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 0deddc38fd..c7ee13798f 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -17,11 +17,11 @@
 
 #include "ARM.h"
 #include "ARMSubtarget.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <vector>
 
 namespace llvm {
@@ -235,7 +235,11 @@ namespace llvm {
       ATOMAND64_DAG,
       ATOMNAND64_DAG,
       ATOMSWAP64_DAG,
-      ATOMCMPXCHG64_DAG
+      ATOMCMPXCHG64_DAG,
+      ATOMMIN64_DAG,
+      ATOMUMIN64_DAG,
+      ATOMMAX64_DAG,
+      ATOMUMAX64_DAG
     };
   }
 
@@ -284,8 +288,9 @@ namespace llvm {
     bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
 
     /// allowsUnalignedMemoryAccesses - Returns true if the target allows
-    /// unaligned memory accesses. of the specified type.
-    virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
+    /// unaligned memory accesses of the specified type. Returns whether it
+    /// is "fast" by reference in the second argument.
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
 
     virtual EVT getOptimalMemOpType(uint64_t Size,
                                     unsigned DstAlign, unsigned SrcAlign,
@@ -293,6 +298,9 @@ namespace llvm {
                                     bool MemcpyStrSrc,
                                     MachineFunction &MF) const;
 
+    using TargetLowering::isZExtFree;
+    virtual bool isZExtFree(SDValue Val, EVT VT2) const;
+
     /// isLegalAddressingMode - Return true if the addressing mode represented
     /// by AM is legal for this target, for a load/store of the specified type.
     virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
@@ -382,6 +390,8 @@ namespace llvm {
     /// materialize the FP immediate as a load from a constant pool.
     virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
 
+    virtual bool isIntImmLegal(const APInt &Imm, EVT VT) const;
+
     virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
                                     const CallInst &I,
                                     unsigned Intrinsic) const;
@@ -506,6 +516,12 @@ namespace llvm {
                                     const SmallVectorImpl<SDValue> &OutVals,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                            SelectionDAG& DAG) const;
+
+    virtual bool CanLowerReturn(CallingConv::ID CallConv,
+                                MachineFunction &MF, bool isVarArg,
+                                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                LLVMContext &Context) const;
+
     virtual SDValue
       LowerReturn(SDValue Chain,
                   CallingConv::ID CallConv, bool isVarArg,
@@ -537,7 +553,9 @@ namespace llvm {
                                           unsigned Op1,
                                           unsigned Op2,
                                           bool NeedsCarry = false,
-                                          bool IsCmpxchg = false) const;
+                                          bool IsCmpxchg = false,
+                                          bool IsMinMax = false,
+                                          ARMCC::CondCodes CC = ARMCC::AL) const;
     MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI,
                                                MachineBasicBlock *BB,
                                                unsigned Size,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index e55e436a7c..329ad54318 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -1676,6 +1676,18 @@ def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
                                  (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2,
                                       GPR:$set1, GPR:$set2),
                                  NoItinerary, []>;
+def ATOMMIN6432  : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+                              (ins GPR:$addr, GPR:$src1, GPR:$src2),
+                              NoItinerary, []>;
+def ATOMUMIN6432  : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+                              (ins GPR:$addr, GPR:$src1, GPR:$src2),
+                              NoItinerary, []>;
+def ATOMMAX6432  : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+                              (ins GPR:$addr, GPR:$src1, GPR:$src2),
+                              NoItinerary, []>;
+def ATOMUMAX6432  : PseudoInst<(outs GPR:$dst1, GPR:$dst2),
+                              (ins GPR:$addr, GPR:$src1, GPR:$src2),
+                              NoItinerary, []>;
 }
 
 def HINT : AI<(outs), (ins imm0_255:$imm), MiscFrm, NoItinerary,
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index f1eb83382d..02b68e25d7 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -2315,13 +2315,15 @@ defm t2ORN  : T2I_bin_irs<0b0011, "orn",
 /// changed to modify CPSR.
 multiclass T2I_un_irs<bits<4> opcod, string opc,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                      PatFrag opnode, bit Cheap = 0, bit ReMat = 0> {
+                      PatFrag opnode,
+                      bit Cheap = 0, bit ReMat = 0, bit MoveImm = 0> {
    // shifted imm
    def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
                 opc, "\t$Rd, $imm",
                 [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
      let isAsCheapAsAMove = Cheap;
      let isReMaterializable = ReMat;
+     let isMoveImm = MoveImm;
      let Inst{31-27} = 0b11110;
      let Inst{25} = 0;
      let Inst{24-21} = opcod;
@@ -2355,7 +2357,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc,
 let AddedComplexity = 1 in
 defm t2MVN  : T2I_un_irs <0b0011, "mvn",
                           IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
-                          UnOpFrag<(not node:$Src)>, 1, 1>;
+                          UnOpFrag<(not node:$Src)>, 1, 1, 1>;
 
 let AddedComplexity = 1 in
 def : T2Pat<(and     rGPR:$src, t2_so_imm_not:$imm),
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 254d8f6b7c..caa5135bfc 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -17,12 +17,12 @@
 #include "ARMConstantPoolValue.h"
 #include "ARMRelocations.h"
 #include "ARMSubtarget.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Memory.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cstdlib>
 using namespace llvm;
 
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index 7928184427..23a6a9b512 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -15,12 +15,12 @@
 #define ARMJITINFO_H
 
 #include "ARMMachineFunctionInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/Target/TargetJITInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
 
 namespace llvm {
   class ARMTargetMachine;
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index a8c8dce0cc..8949b50f67 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -18,8 +18,12 @@
 #include "ARMBaseRegisterInfo.h"
 #include "ARMMachineFunctionInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -28,18 +32,14 @@
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/DataLayout.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 3dd0848058..1bd2a93a8b 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -15,8 +15,8 @@
 #include "ARM.h"
 #include "ARMAsmPrinter.h"
 #include "MCTargetDesc/ARMMCExpr.h"
-#include "llvm/Constants.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Constants.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Target/Mangler.h"
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index c0ac04b600..88d96c0be8 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -15,10 +15,10 @@
 #define ARMMACHINEFUNCTIONINFO_H
 
 #include "ARMSubtarget.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/BitVector.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
 
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 4c44f69f4d..72be065d64 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -13,8 +13,8 @@
 
 #define DEBUG_TYPE "arm-selectiondag-info"
 #include "ARMTargetMachine.h"
-#include "llvm/DerivedTypes.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/DerivedTypes.h"
 using namespace llvm;
 
 ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index fc67d418ea..f5ea64d42b 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARMSubtarget.h"
-#include "ARMBaseRegisterInfo.h"
 #include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
 #include "llvm/GlobalValue.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 #define GET_SUBTARGETINFO_TARGET_DESC
 #define GET_SUBTARGETINFO_CTOR
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index f07ad350a2..28975f9243 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -15,9 +15,9 @@
 #define ARMSUBTARGET_H
 
 #include "MCTargetDesc/ARMMCTargetDesc.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
@@ -39,7 +39,7 @@ class StringRef;
 class ARMSubtarget : public ARMGenSubtargetInfo {
 protected:
   enum ARMProcFamilyEnum {
-    Others, CortexA8, CortexA9, CortexA15, Swift
+    Others, CortexA5, CortexA8, CortexA9, CortexA15, Swift
   };
 
   /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
@@ -218,6 +218,7 @@ protected:
   bool hasV6T2Ops() const { return HasV6T2Ops; }
   bool hasV7Ops()   const { return HasV7Ops;  }
 
+  bool isCortexA5() const { return ARMProcFamily == CortexA5; }
   bool isCortexA8() const { return ARMProcFamily == CortexA8; }
   bool isCortexA9() const { return ARMProcFamily == CortexA9; }
   bool isCortexA15() const { return ARMProcFamily == CortexA15; }
@@ -256,7 +257,9 @@ protected:
 
   bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; }
   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
-  bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }  // @LOCALMOD
+  bool isTargetNaCl() const {
+    return TargetTriple.getOS() == Triple::NaCl;
+  }
   bool isTargetELF() const { return !isTargetDarwin(); }
 
   bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index fed2d99e65..533be838d8 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -11,11 +11,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARMTargetMachine.h"
-#include "ARMFrameLowering.h"
 #include "ARM.h"
-#include "llvm/PassManager.h"
+#include "ARMFrameLowering.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index cd6921e1ae..2d265afa1b 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -14,20 +14,20 @@
 #ifndef ARMTARGETMACHINE_H
 #define ARMTARGETMACHINE_H
 
-#include "ARMInstrInfo.h"
 #include "ARMFrameLowering.h"
-#include "ARMJITInfo.h"
-#include "ARMSubtarget.h"
 #include "ARMISelLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMJITInfo.h"
 #include "ARMSelectionDAGInfo.h"
-#include "Thumb1InstrInfo.h"
+#include "ARMSubtarget.h"
 #include "Thumb1FrameLowering.h"
+#include "Thumb1InstrInfo.h"
 #include "Thumb2InstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetTransformImpl.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/DataLayout.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetTransformImpl.h"
 
 // @LOCALMOD-START
 #include "llvm/Support/CommandLine.h"
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index ca57541726..c954550bb8 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -9,16 +9,16 @@
 
 #include "ARMTargetObjectFile.h"
 #include "ARMSubtarget.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h" // @LOCALMOD
 #include "llvm/CodeGen/MachineModuleInfo.h" // @LOCALMOD
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ELF.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 using namespace dwarf;
 
diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
index fda8536fcf..3ada0f2af6 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
@@ -8,19 +8,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/ARMBaseInfo.h"
-
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCTargetAsmLexer.h"
-
 #include "llvm/Support/TargetRegistry.h"
-
-#include "llvm/ADT/StringSwitch.h"
-
-#include <string>
 #include <map>
+#include <string>
 
 using namespace llvm;
 
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 7383aa21ce..4685b1d193 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -7,31 +7,31 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/ARMBaseInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMMCExpr.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index f00142de50..65c8c1a561 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -9,21 +9,21 @@
 
 #define DEBUG_TYPE "arm-disassembler"
 
+#include "llvm/MC/MCDisassembler.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "MCTargetDesc/ARMMCExpr.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMMCExpr.h"
 #include "llvm/MC/EDInstInfo.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LEB128.h"
+#include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 #include <vector>
@@ -1281,7 +1281,13 @@ static DecodeStatus DecodeBitfieldMaskOperand(MCInst &Inst, unsigned Val,
   unsigned lsb = fieldFromInstruction(Val, 0, 5);
 
   DecodeStatus S = MCDisassembler::Success;
-  if (lsb > msb) Check(S, MCDisassembler::SoftFail);
+  if (lsb > msb) {
+    Check(S, MCDisassembler::SoftFail);
+    // The check above will cause the warning for the "potentially undefined
+    // instruction encoding" but we can't build a bad MCOperand value here
+    // with a lsb > msb or else printing the MCInst will cause a crash.
+    lsb = msb;
+  }
 
   uint32_t msb_mask = 0xFFFFFFFF;
   if (msb != 31) msb_mask = (1U << (msb+1)) - 1;
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 75de07e7a9..26a4221086 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -13,11 +13,11 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "ARMInstPrinter.h"
-#include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/MC/MCInst.h"
+#include "MCTargetDesc/ARMBaseInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/raw_ostream.h"
@@ -358,7 +358,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     printRegName(O, Reg);
   } else if (Op.isImm()) {
     O << markup("<imm:")
-      << '#' << Op.getImm()
+      << '#' << formatImm(Op.getImm())
       << markup(">");
   } else {
     assert(Op.isExpr() && "unknown operand kind in printOperand");
@@ -384,7 +384,7 @@ void ARMInstPrinter::printThumbLdrLabelOperand(const MCInst *MI, unsigned OpNum,
     O << *MO1.getExpr();
   else if (MO1.isImm()) {
     O << markup("<mem:") << "[pc, "
-      << markup("<imm:") << "#" << MO1.getImm()
+      << markup("<imm:") << "#" << formatImm(MO1.getImm())
       << markup(">]>", "]");
   }
   else
@@ -976,7 +976,7 @@ void ARMInstPrinter::printAdrLabelOperand(const MCInst *MI, unsigned OpNum,
 void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
                                             raw_ostream &O) {
   O << markup("<imm:")
-    << "#" << MI->getOperand(OpNum).getImm() * 4
+    << "#" << formatImm(MI->getOperand(OpNum).getImm() * 4)
     << markup(">");
 }
 
@@ -984,7 +984,7 @@ void ARMInstPrinter::printThumbSRImm(const MCInst *MI, unsigned OpNum,
                                      raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
   O << markup("<imm:")
-    << "#" << (Imm == 0 ? 32 : Imm)
+    << "#" << formatImm((Imm == 0 ? 32 : Imm))
     << markup(">");
 }
 
@@ -1041,7 +1041,7 @@ void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
   if (unsigned ImmOffs = MO2.getImm()) {
     O << ", "
       << markup("<imm:")
-      << "#" << ImmOffs * Scale
+      << "#" << formatImm(ImmOffs * Scale)
       << markup(">");
   }
   O << "]" << markup(">");
@@ -1192,7 +1192,7 @@ void ARMInstPrinter::printT2AddrModeImm0_1020s4Operand(const MCInst *MI,
   if (MO2.getImm()) {
     O << ", "
       << markup("<imm:")
-      << "#" << MO2.getImm() * 4
+      << "#" << formatImm(MO2.getImm() * 4)
       << markup(">");
   }
   O << "]" << markup(">");
@@ -1282,7 +1282,7 @@ void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum,
                                             raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
   O << markup("<imm:")
-    << "#" << Imm + 1
+    << "#" << formatImm(Imm + 1)
     << markup(">");
 }
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index b99dd7a850..45fef17785 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -8,9 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "MCTargetDesc/ARMAddressingModes.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMFixupKinds.h"
-#include "MCTargetDesc/ARMAddressingModes.h"
+#include "llvm/MC/MCAsmBackend.h"
 #include "MCTargetDesc/ARMMCNaCl.h" // @LOCALMOD
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
@@ -22,7 +23,6 @@
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
-#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Object/MachOFormat.h"
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 253d1fa2ab..d291304e83 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -7,17 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/ARMFixupKinds.h"
 #include "MCTargetDesc/ARMMCTargetDesc.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include "MCTargetDesc/ARMFixupKinds.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index d0e127a8f3..09e15afd3c 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -12,11 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMFixupKinds.h"
 #include "MCTargetDesc/ARMMCExpr.h"
-#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -24,8 +26,6 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index 22e14a2281..fc8505b052 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -9,8 +9,8 @@
 
 #define DEBUG_TYPE "armmcexpr"
 #include "ARMMCExpr.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
 using namespace llvm;
 
 const ARMMCExpr*
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index bb40b4b0de..e7d3e7cb0b 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARMMCTargetDesc.h"
-#include "ARMMCAsmInfo.h"
 #include "ARMBaseInfo.h"
+#include "ARMMCAsmInfo.h"
 #include "InstPrinter/ARMInstPrinter.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrAnalysis.h"
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 2154c93176..b9efe74b41 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -7,17 +7,18 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMFixupKinds.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCMachOSymbolFlags.h"
+#include "llvm/MC/MCMachObjectWriter.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Object/MachOFormat.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 70643bcda3..2e266c2e96 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -16,16 +16,16 @@
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMSubtarget.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 static cl::opt<bool>
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 735b255759..095736d52a 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -15,8 +15,8 @@
 #include "ARM.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MCInst.h"
 
 using namespace llvm;
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index a39b722cae..957a34d11d 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -18,21 +18,21 @@
 #include "ARMMachineFunctionInfo.h"
 #include "ARMSubtarget.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
 extern cl::opt<bool> ReuseFrameIndexVals;
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index d54aa93532..97c254ce75 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -11,12 +11,12 @@
 #include "ARM.h"
 #include "ARMMachineFunctionInfo.h"
 #include "Thumb2InstrInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumITs,        "Number of IT blocks inserted");
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index e9e20ddd87..9fba8227f6 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -51,7 +51,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
   MachineBasicBlock *MBB = Tail->getParent();
   ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
   if (!AFI->hasITBlocks()) {
-    TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+    TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest);
     return;
   }
 
@@ -65,7 +65,7 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
     --MBBI;
 
   // Actually replace the tail.
-  TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+  TargetInstrInfo::ReplaceTailWithBranchTo(Tail, NewDest);
 
   // Fix up IT.
   if (CC != ARMCC::AL) {
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp
index 29a87d0162..9c018bc37a 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -16,12 +16,12 @@
 #include "ARM.h"
 #include "ARMBaseInstrInfo.h"
 #include "ARMSubtarget.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
 using namespace llvm;
 
 Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index f18f491f49..d1cde1bf98 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -9,19 +9,19 @@
 
 #define DEBUG_TYPE "t2-reduce-size"
 #include "ARM.h"
-#include "ARMBaseRegisterInfo.h"
 #include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
 #include "ARMSubtarget.h"
-#include "Thumb2InstrInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumNarrows,  "Number of 32-bit instrs reduced to 16-bit ones");
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index a85acaaa14..c87a33a8b9 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -1,14 +1,12 @@
 add_llvm_library(LLVMTarget
   Mangler.cpp
   Target.cpp
-  TargetInstrInfo.cpp
   TargetIntrinsicInfo.cpp
   TargetJITInfo.cpp
   TargetLibraryInfo.cpp
   TargetLoweringObjectFile.cpp
   TargetMachine.cpp
   TargetMachineC.cpp
-  TargetRegisterInfo.cpp
   TargetSubtargetInfo.cpp
   TargetTransformImpl.cpp
   )
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 5c909903f9..788c6e6866 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -13,25 +13,25 @@
 //===----------------------------------------------------------------------===//
 
 #include "CPPTargetMachine.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/CallingConv.h"
+#include "llvm/Config/config.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Instruction.h"
 #include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/PassManager.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Config/config.h"
 #include <algorithm>
 #include <cstdio>
 #include <map>
@@ -141,7 +141,7 @@ namespace {
     std::string getCppName(const Value* val);
     inline void printCppName(const Value* val);
 
-    void printAttributes(const AttrListPtr &PAL, const std::string &name);
+    void printAttributes(const AttributeSet &PAL, const std::string &name);
     void printType(Type* Ty);
     void printTypes(const Module* M);
 
@@ -464,9 +464,9 @@ void CppWriter::printCppName(const Value* val) {
   printEscapedString(getCppName(val));
 }
 
-void CppWriter::printAttributes(const AttrListPtr &PAL,
+void CppWriter::printAttributes(const AttributeSet &PAL,
                                 const std::string &name) {
-  Out << "AttrListPtr " << name << "_PAL;";
+  Out << "AttributeSet " << name << "_PAL;";
   nl(Out);
   if (!PAL.isEmpty()) {
     Out << '{'; in(); nl(Out);
@@ -518,7 +518,7 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
       Out << "Attrs.push_back(PAWI);";
       nl(Out);
     }
-    Out << name << "_PAL = AttrListPtr::get(mod->getContext(), Attrs);";
+    Out << name << "_PAL = AttributeSet::get(mod->getContext(), Attrs);";
     nl(Out);
     out(); nl(Out);
     Out << '}'; nl(Out);
@@ -1941,14 +1941,6 @@ void CppWriter::printModule(const std::string& fname,
   }
   nl(Out);
 
-  // Loop over the dependent libraries and emit them.
-  Module::lib_iterator LI = TheModule->lib_begin();
-  Module::lib_iterator LE = TheModule->lib_end();
-  while (LI != LE) {
-    Out << "mod->addLibrary(\"" << *LI << "\");";
-    nl(Out);
-    ++LI;
-  }
   printModuleBody();
   nl(Out) << "return mod;";
   nl(Out,-1) << "}";
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 30d765d6c9..cc6f1fb832 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -14,8 +14,8 @@
 #ifndef CPPTARGETMACHINE_H
 #define CPPTARGETMACHINE_H
 
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
 
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 306084bb8c..aee43ba517 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -9,6 +9,8 @@ tablegen(LLVM HexagonGenSubtargetInfo.inc -gen-subtarget)
 tablegen(LLVM HexagonGenDFAPacketizer.inc -gen-dfa-packetizer)
 add_public_tablegen_target(HexagonCommonTableGen)
 
+set(LLVM_COMMON_DEPENDS intrinsics_gen)
+
 add_llvm_target(HexagonCodeGen
   HexagonAsmPrinter.cpp
   HexagonCallingConvLower.cpp
@@ -33,8 +35,6 @@ add_llvm_target(HexagonCodeGen
   HexagonNewValueJump.cpp
 )
 
-add_dependencies(LLVMHexagonCodeGen intrinsics_gen)
-
 add_subdirectory(TargetInfo)
 add_subdirectory(InstPrinter)
 add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
index 451e56206e..8a5ee40590 100644
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -32,6 +32,107 @@ def ArchV5       : SubtargetFeature<"v5", "HexagonArchVersion", "V5",
                                     "Hexagon v5">;
 
 //===----------------------------------------------------------------------===//
+// Hexagon Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasV2T                      : Predicate<"Subtarget.hasV2TOps()">;
+def HasV2TOnly                  : Predicate<"Subtarget.hasV2TOpsOnly()">;
+def NoV2T                       : Predicate<"!Subtarget.hasV2TOps()">;
+def HasV3T                      : Predicate<"Subtarget.hasV3TOps()">;
+def HasV3TOnly                  : Predicate<"Subtarget.hasV3TOpsOnly()">;
+def NoV3T                       : Predicate<"!Subtarget.hasV3TOps()">;
+def HasV4T                      : Predicate<"Subtarget.hasV4TOps()">;
+def NoV4T                       : Predicate<"!Subtarget.hasV4TOps()">;
+def HasV5T                      : Predicate<"Subtarget.hasV5TOps()">;
+def NoV5T                       : Predicate<"!Subtarget.hasV5TOps()">;
+def UseMEMOP                    : Predicate<"Subtarget.useMemOps()">;
+def IEEERndNearV5T              : Predicate<"Subtarget.modeIEEERndNear()">;
+
+//===----------------------------------------------------------------------===//
+// Classes used for relation maps.
+//===----------------------------------------------------------------------===//
+// PredRel - Filter class used to relate non-predicated instructions with their
+// predicated forms.
+class PredRel;
+// PredNewRel - Filter class used to relate predicated instructions with their
+// predicate-new forms.
+class PredNewRel: PredRel;
+// ImmRegRel - Filter class used to relate instructions having reg-reg form
+// with their reg-imm counterparts.
+class ImmRegRel;
+// NewValueRel - Filter class used to relate regular store instructions with
+// their new-value store form.
+class NewValueRel: PredNewRel;
+// NewValueRel - Filter class used to relate load/store instructions having
+// different addressing modes with each other.
+class AddrModeRel: NewValueRel;
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate non-predicate instructions with their
+// predicated formats - true and false.
+//
+
+def getPredOpcode : InstrMapping {
+  let FilterClass = "PredRel";
+  // Instructions with the same BaseOpcode and isNVStore values form a row.
+  let RowFields = ["BaseOpcode", "isNVStore", "PNewValue"];
+  // Instructions with the same predicate sense form a column.
+  let ColFields = ["PredSense"];
+  // The key column is the unpredicated instructions.
+  let KeyCol = [""];
+  // Value columns are PredSense=true and PredSense=false
+  let ValueCols = [["true"], ["false"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate predicated instructions with their .new
+// format.
+//
+def getPredNewOpcode : InstrMapping {
+  let FilterClass = "PredNewRel";
+  let RowFields = ["BaseOpcode", "PredSense", "isNVStore"];
+  let ColFields = ["PNewValue"];
+  let KeyCol = [""];
+  let ValueCols = [["new"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Generate mapping table to relate store instructions with their new-value
+// format.
+//
+def getNewValueOpcode : InstrMapping {
+  let FilterClass = "NewValueRel";
+  let RowFields = ["BaseOpcode", "PredSense", "PNewValue"];
+  let ColFields = ["isNVStore"];
+  let KeyCol = ["0"];
+  let ValueCols = [["1"]];
+}
+
+def getBasedWithImmOffset : InstrMapping {
+  let FilterClass = "AddrModeRel";
+  let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore",
+                   "isMEMri", "isFloat"];
+  let ColFields = ["addrMode"];
+  let KeyCol = ["Absolute"];
+  let ValueCols = [["BaseImmOffset"]];
+}
+
+def getBaseWithRegOffset : InstrMapping {
+  let FilterClass = "AddrModeRel";
+  let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"];
+  let ColFields = ["addrMode"];
+  let KeyCol = ["BaseImmOffset"];
+  let ValueCols = [["BaseRegOffset"]];
+}
+
+def getRegForm : InstrMapping {
+  let FilterClass = "ImmRegRel";
+  let RowFields = ["CextOpcode", "PredSense", "PNewValue"];
+  let ColFields = ["InputType"];
+  let KeyCol = ["imm"];
+  let ValueCols = [["reg"]];
+}
+
+//===----------------------------------------------------------------------===//
 // Register File, Calling Conv, Instruction Descriptions
 //===----------------------------------------------------------------------===//
 include "HexagonSchedule.td"
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index c15bce608f..cedb3a8303 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -14,23 +14,26 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "asm-printer"
-#include "Hexagon.h"
 #include "HexagonAsmPrinter.h"
-#include "HexagonMachineFunctionInfo.h"
+#include "Hexagon.h"
 #include "HexagonMCInst.h"
-#include "HexagonTargetMachine.h"
+#include "HexagonMachineFunctionInfo.h"
 #include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
 #include "InstPrinter/HexagonInstPrinter.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -38,22 +41,19 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index 9bca9e0707..d4078ad28b 100644
--- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -7,21 +7,22 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "hexagon_cfg"
-#include "HexagonTargetMachine.h"
-#include "HexagonSubtarget.h"
+#include "Hexagon.h"
 #include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.cpp b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
index 73f9d9acab..1b4b0206f7 100644
--- a/lib/Target/Hexagon/HexagonCallingConvLower.cpp
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.cpp
@@ -15,12 +15,12 @@
 
 #include "HexagonCallingConvLower.h"
 #include "Hexagon.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 Hexagon_CCState::Hexagon_CCState(CallingConv::ID CC, bool isVarArg,
diff --git a/lib/Target/Hexagon/HexagonCallingConvLower.h b/lib/Target/Hexagon/HexagonCallingConvLower.h
index 1f601e87ad..489b3a3e59 100644
--- a/lib/Target/Hexagon/HexagonCallingConvLower.h
+++ b/lib/Target/Hexagon/HexagonCallingConvLower.h
@@ -17,9 +17,9 @@
 #define LLVM_Hexagon_CODEGEN_CALLINGCONVLOWER_H
 
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
 
 //
 // Need to handle varargs.
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index ae2ca37888..08144217fd 100644
--- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -17,9 +17,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "HexagonTargetMachine.h"
-#include "HexagonSubtarget.h"
+#include "Hexagon.h"
 #include "HexagonMachineFunctionInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -30,12 +31,12 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index cd682df7a5..29bfc6b466 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -11,28 +11,28 @@
 #include "HexagonFrameLowering.h"
 #include "Hexagon.h"
 #include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
 #include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"
 #include "HexagonTargetMachine.h"
-#include "HexagonMachineFunctionInfo.h"
-#include "llvm/Function.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/MC/MachineLocation.h"
+#include "llvm/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Type.h"
 
 using namespace llvm;
 
@@ -166,7 +166,8 @@ bool HexagonFrameLowering::hasTailCall(MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
   unsigned RetOpcode = MBBI->getOpcode();
 
-  return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;}
+  return RetOpcode == Hexagon::TCRETURNtg || RetOpcode == Hexagon::TCRETURNtext;
+}
 
 void HexagonFrameLowering::emitEpilogue(MachineFunction &MF,
                                      MachineBasicBlock &MBB) const {
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index d756aec9be..d875fea575 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -29,18 +29,18 @@
 #define DEBUG_TYPE "hwloops"
 #include "Hexagon.h"
 #include "HexagonTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/PassSupport.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Constants.h"
+#include "llvm/PassSupport.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -461,6 +461,9 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
     return false;
   }
   MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
+  if (LastI == LastMBB->end()) {
+    return false;
+  }
 
   // Determine the loop start.
   MachineBasicBlock *LoopStart = L->getTopBlock();
@@ -478,6 +481,9 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
 
   // Convert the loop to a hardware loop
   DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
+  DebugLoc InsertPosDL;
+  if (InsertPos != Preheader->end())
+    InsertPosDL = InsertPos->getDebugLoc();
 
   if (TripCount->isReg()) {
     // Create a copy of the loop count register.
@@ -485,23 +491,23 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
     const TargetRegisterClass *RC =
       MF->getRegInfo().getRegClass(TripCount->getReg());
     unsigned CountReg = MF->getRegInfo().createVirtualRegister(RC);
-    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+    BuildMI(*Preheader, InsertPos, InsertPosDL,
             TII->get(TargetOpcode::COPY), CountReg).addReg(TripCount->getReg());
     if (TripCount->isNeg()) {
       unsigned CountReg1 = CountReg;
       CountReg = MF->getRegInfo().createVirtualRegister(RC);
-      BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+      BuildMI(*Preheader, InsertPos, InsertPosDL,
               TII->get(Hexagon::NEG), CountReg).addReg(CountReg1);
     }
 
     // Add the Loop instruction to the beginning of the loop.
-    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+    BuildMI(*Preheader, InsertPos, InsertPosDL,
             TII->get(Hexagon::LOOP0_r)).addMBB(LoopStart).addReg(CountReg);
   } else {
     assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
     // Add the Loop immediate instruction to the beginning of the loop.
     int64_t CountImm = TripCount->getImm();
-    BuildMI(*Preheader, InsertPos, InsertPos->getDebugLoc(),
+    BuildMI(*Preheader, InsertPos, InsertPosDL,
             TII->get(Hexagon::LOOP0_i)).addMBB(LoopStart).addImm(CountImm);
   }
 
@@ -514,8 +520,9 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
   BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
 
   // Replace the loop branch with an endloop instruction.
-  DebugLoc dl = LastI->getDebugLoc();
-  BuildMI(*LastMBB, LastI, dl, TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
+  DebugLoc LastIDL = LastI->getDebugLoc();
+  BuildMI(*LastMBB, LastI, LastIDL,
+          TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
 
   // The loop ends with either:
   //  - a conditional branch followed by an unconditional branch, or
@@ -530,7 +537,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
         TII->RemoveBranch(*LastMBB);
       }
       SmallVector<MachineOperand, 0> Cond;
-      TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, dl);
+      TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, LastIDL);
     }
   } else {
     // Conditional branch to loop start; just delete it.
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 5499134eb9..275e135cd6 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -12,10 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "hexagon-isel"
+#include "Hexagon.h"
 #include "HexagonISelLowering.h"
 #include "HexagonTargetMachine.h"
-#include "llvm/Intrinsics.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 
@@ -94,6 +95,7 @@ public:
   SDNode *SelectConstant(SDNode *N);
   SDNode *SelectConstantFP(SDNode *N);
   SDNode *SelectAdd(SDNode *N);
+  bool isConstExtProfitable(SDNode *N) const;
 
   // Include the pieces autogenerated from the target description.
 #include "HexagonGenDAGISel.inc"
@@ -1507,3 +1509,13 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
   OutOps.push_back(Op1);
   return false;
 }
+
+bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const {
+  unsigned UseCount = 0;
+  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+    UseCount++;
+  }
+
+  return (UseCount <= 1);
+
+}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index aec1ed327f..10c54cba9f 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -13,16 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "HexagonISelLowering.h"
-#include "HexagonTargetMachine.h"
 #include "HexagonMachineFunctionInfo.h"
-#include "HexagonTargetObjectFile.h"
 #include "HexagonSubtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/Intrinsics.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonTargetObjectFile.h"
 #include "llvm/CallingConv.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -32,6 +26,12 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index fe6c905adf..14254ce742 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -16,9 +16,9 @@
 #define Hexagon_ISELLOWERING_H
 
 #include "Hexagon.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/CallingConv.h"
 #include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
   namespace HexagonISD {
@@ -50,6 +50,15 @@ namespace llvm {
       BARRIER,     // Memory barrier.
       WrapperJT,
       WrapperCP,
+      WrapperCombineII,
+      WrapperCombineRR,
+      WrapperPackhl,
+      WrapperSplatB,
+      WrapperSplatH,
+      WrapperShuffEB,
+      WrapperShuffEH,
+      WrapperShuffOB,
+      WrapperShuffOH,
       TC_RETURN
     };
   }
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index c9e0025453..3b1ae098f8 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "HexagonInstrInfo.h"
+#include "Hexagon.h"
 #include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"
-#include "Hexagon.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/MathExtras.h"
 #define GET_INSTRINFO_CTOR
@@ -314,7 +314,7 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
   if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) {
-    BuildMI(MBB, I, DL, get(Hexagon::TFR_64), DestReg).addReg(SrcReg);
+    BuildMI(MBB, I, DL, get(Hexagon::TFR64), DestReg).addReg(SrcReg);
     return;
   }
   if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) {
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 2bb53f899c..29e3eb10f5 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -16,8 +16,8 @@
 
 #include "HexagonRegisterInfo.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 
 #define GET_INSTRINFO_HEADER
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index ca21dbb4e1..dc93fb6b1b 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -14,94 +14,6 @@
 include "HexagonInstrFormats.td"
 include "HexagonOperands.td"
 
-//===----------------------------------------------------------------------===//
-// Classes used for relation maps.
-//===----------------------------------------------------------------------===//
-// PredRel - Filter class used to relate non-predicated instructions with their
-// predicated forms.
-class PredRel;
-// PredNewRel - Filter class used to relate predicated instructions with their
-// predicate-new forms.
-class PredNewRel: PredRel;
-// ImmRegRel - Filter class used to relate instructions having reg-reg form
-// with their reg-imm counterparts.
-class ImmRegRel;
-// NewValueRel - Filter class used to relate regular store instructions with
-// their new-value store form.
-class NewValueRel: PredNewRel;
-// NewValueRel - Filter class used to relate load/store instructions having
-// different addressing modes with each other.
-class AddrModeRel: NewValueRel;
-
-//===----------------------------------------------------------------------===//
-// Hexagon Instruction Predicate Definitions.
-//===----------------------------------------------------------------------===//
-def HasV2T                      : Predicate<"Subtarget.hasV2TOps()">;
-def HasV2TOnly                  : Predicate<"Subtarget.hasV2TOpsOnly()">;
-def NoV2T                       : Predicate<"!Subtarget.hasV2TOps()">;
-def HasV3T                      : Predicate<"Subtarget.hasV3TOps()">;
-def HasV3TOnly                  : Predicate<"Subtarget.hasV3TOpsOnly()">;
-def NoV3T                       : Predicate<"!Subtarget.hasV3TOps()">;
-def HasV4T                      : Predicate<"Subtarget.hasV4TOps()">;
-def NoV4T                       : Predicate<"!Subtarget.hasV4TOps()">;
-def HasV5T                      : Predicate<"Subtarget.hasV5TOps()">;
-def NoV5T                       : Predicate<"!Subtarget.hasV5TOps()">;
-def UseMEMOP                    : Predicate<"Subtarget.useMemOps()">;
-def IEEERndNearV5T              : Predicate<"Subtarget.modeIEEERndNear()">;
-
-// Addressing modes.
-def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
-def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
-def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>;
-def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>;
-def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>;
-def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>;
-def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>;
-def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>;
-def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>;
-
-// Address operands.
-def MEMrr : Operand<i32> {
-  let PrintMethod = "printMEMrrOperand";
-  let MIOperandInfo = (ops IntRegs, IntRegs);
-}
-
-// Address operands
-def MEMri : Operand<i32> {
-  let PrintMethod = "printMEMriOperand";
-  let MIOperandInfo = (ops IntRegs, IntRegs);
-}
-
-def MEMri_s11_2 : Operand<i32>,
-  ComplexPattern<i32, 2, "SelectMEMriS11_2", []> {
-  let PrintMethod = "printMEMriOperand";
-  let MIOperandInfo = (ops IntRegs, s11Imm);
-}
-
-def FrameIndex : Operand<i32> {
-  let PrintMethod = "printFrameIndexOperand";
-  let MIOperandInfo = (ops IntRegs, s11Imm);
-}
-
-let PrintMethod = "printGlobalOperand" in
-  def globaladdress : Operand<i32>;
-
-let PrintMethod = "printJumpTable" in
- def jumptablebase : Operand<i32>;
-
-def brtarget : Operand<OtherVT>;
-def calltarget : Operand<i32>;
-
-def bblabel : Operand<i32>;
-def bbl   : SDNode<"ISD::BasicBlock", SDTPtrLeaf   , [], "BasicBlockSDNode">;
-
-def symbolHi32 : Operand<i32> {
-  let PrintMethod = "printSymbolHi";
-}
-def symbolLo32 : Operand<i32> {
-  let PrintMethod = "printSymbolLo";
-}
-
 // Multi-class for logical operators.
 multiclass ALU32_rr_ri<string OpcStr, SDNode OpNode> {
   def rr : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
@@ -129,40 +41,54 @@ multiclass CMP32_rr<string OpcStr, PatFrag OpNode> {
                        (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
 }
 
-multiclass CMP32_rr_ri_s10<string OpcStr, PatFrag OpNode> {
-  def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
-                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
-                 [(set (i1 PredRegs:$dst),
-                       (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
-  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Imm:$c),
-                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
-                 [(set (i1 PredRegs:$dst),
-                       (OpNode (i32 IntRegs:$b), s10ImmPred:$c))]>;
+multiclass CMP32_rr_ri_s10<string OpcStr, string CextOp, PatFrag OpNode> {
+  let CextOpcode = CextOp in {
+    let InputType = "reg" in
+    def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                   !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+                   [(set (i1 PredRegs:$dst),
+                         (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
+
+    let isExtendable = 1, opExtendable = 2, isExtentSigned = 1,
+    opExtentBits = 10, InputType = "imm" in
+    def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s10Ext:$c),
+                   !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+                   [(set (i1 PredRegs:$dst),
+                         (OpNode (i32 IntRegs:$b), s10ExtPred:$c))]>;
+  }
 }
 
-multiclass CMP32_rr_ri_u9<string OpcStr, PatFrag OpNode> {
-  def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
-                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
-                 [(set (i1 PredRegs:$dst),
-                       (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
-  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Imm:$c),
-                 !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
-                 [(set (i1 PredRegs:$dst),
-                       (OpNode (i32 IntRegs:$b), u9ImmPred:$c))]>;
+multiclass CMP32_rr_ri_u9<string OpcStr, string CextOp, PatFrag OpNode> {
+  let CextOpcode = CextOp in {
+    let InputType = "reg" in
+    def rr : ALU32_rr<(outs PredRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                   !strconcat("$dst = ", !strconcat(OpcStr, "($b, $c)")),
+                   [(set (i1 PredRegs:$dst),
+                         (OpNode (i32 IntRegs:$b), (i32 IntRegs:$c)))]>;
+
+    let isExtendable = 1, opExtendable = 2, isExtentSigned = 0,
+    opExtentBits = 9, InputType = "imm" in
+    def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u9Ext:$c),
+                   !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
+                   [(set (i1 PredRegs:$dst),
+                         (OpNode (i32 IntRegs:$b), u9ExtPred:$c))]>;
+  }
 }
 
 multiclass CMP32_ri_u8<string OpcStr, PatFrag OpNode> {
-  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u8Imm:$c),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 8 in
+  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, u8Ext:$c),
                  !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
                  [(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b),
-                                                   u8ImmPred:$c))]>;
+                                                   u8ExtPred:$c))]>;
 }
 
 multiclass CMP32_ri_s8<string OpcStr, PatFrag OpNode> {
-  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Imm:$c),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
+  def ri : ALU32_ri<(outs PredRegs:$dst), (ins IntRegs:$b, s8Ext:$c),
                  !strconcat("$dst = ", !strconcat(OpcStr, "($b, #$c)")),
                  [(set (i1 PredRegs:$dst), (OpNode (i32 IntRegs:$b),
-                                                   s8ImmPred:$c))]>;
+                                                   s8ExtPred:$c))]>;
 }
 }
 
@@ -220,7 +146,7 @@ defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel;
 multiclass ALU32ri_Pbase<string mnemonic, bit isNot, bit isPredNew> {
   let PNewValue = #!if(isPredNew, "new", "") in
   def #NAME# : ALU32_ri<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, s8Imm: $src3),
+            (ins PredRegs:$src1, IntRegs:$src2, s8Ext: $src3),
             !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ",
             ") $dst = ")#mnemonic#"($src2, #$src3)",
             []>;
@@ -234,17 +160,19 @@ multiclass ALU32ri_Pred<string mnemonic, bit PredNot> {
   }
 }
 
-let InputType = "imm" in
+let isExtendable = 1, InputType = "imm" in
 multiclass ALU32ri_base<string mnemonic, string CextOp, SDNode OpNode> {
   let CextOpcode = CextOp, BaseOpcode = CextOp#_ri in {
-    let isPredicable = 1 in
+    let opExtendable = 2, isExtentSigned = 1, opExtentBits = 16,
+    isPredicable = 1 in
     def #NAME# : ALU32_ri<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, s16Imm:$src2),
+            (ins IntRegs:$src1, s16Ext:$src2),
             "$dst = "#mnemonic#"($src1, #$src2)",
             [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1),
-                                              (s16ImmPred:$src2)))]>;
+                                              (s16ExtPred:$src2)))]>;
 
-    let neverHasSideEffects = 1, isPredicated = 1 in {
+    let opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
+    neverHasSideEffects = 1, isPredicated = 1 in {
       defm Pt : ALU32ri_Pred<mnemonic, 0>;
       defm NotPt : ALU32ri_Pred<mnemonic, 1>;
     }
@@ -253,23 +181,26 @@ multiclass ALU32ri_base<string mnemonic, string CextOp, SDNode OpNode> {
 
 defm ADD_ri : ALU32ri_base<"add", "ADD", add>, ImmRegRel, PredNewRel;
 
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
+CextOpcode = "OR", InputType = "imm" in
 def OR_ri : ALU32_ri<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, s10Imm:$src2),
+            (ins IntRegs:$src1, s10Ext:$src2),
             "$dst = or($src1, #$src2)",
             [(set (i32 IntRegs:$dst), (or (i32 IntRegs:$src1),
-                                          s10ImmPred:$src2))]>;
+                                          s10ExtPred:$src2))]>, ImmRegRel;
 
 def NOT_rr : ALU32_rr<(outs IntRegs:$dst),
             (ins IntRegs:$src1),
             "$dst = not($src1)",
             [(set (i32 IntRegs:$dst), (not (i32 IntRegs:$src1)))]>;
 
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
+InputType = "imm", CextOpcode = "AND" in
 def AND_ri : ALU32_ri<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, s10Imm:$src2),
+            (ins IntRegs:$src1, s10Ext:$src2),
             "$dst = and($src1, #$src2)",
             [(set (i32 IntRegs:$dst), (and (i32 IntRegs:$src1),
-                                           s10ImmPred:$src2))]>;
-
+                                           s10ExtPred:$src2))]>, ImmRegRel;
 // Negate.
 def NEG : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
           "$dst = neg($src1)",
@@ -281,27 +212,112 @@ def NOP : ALU32_rr<(outs), (ins),
           []>;
 
 // Rd32=sub(#s10,Rs32)
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 10,
+CextOpcode = "SUB", InputType = "imm" in
 def SUB_ri : ALU32_ri<(outs IntRegs:$dst),
-            (ins s10Imm:$src1, IntRegs:$src2),
+            (ins s10Ext:$src1, IntRegs:$src2),
             "$dst = sub(#$src1, $src2)",
-            [(set IntRegs:$dst, (sub s10ImmPred:$src1, IntRegs:$src2))]>;
-
-// Transfer immediate.
-let isMoveImm = 1, isReMaterializable = 1, isPredicable = 1 in
-def TFRI : ALU32_ri<(outs IntRegs:$dst), (ins s16Imm:$src1),
-           "$dst = #$src1",
-           [(set (i32 IntRegs:$dst), s16ImmPred:$src1)]>;
-
-// Transfer register.
-let neverHasSideEffects = 1, isPredicable = 1 in
-def TFR : ALU32_ri<(outs IntRegs:$dst), (ins IntRegs:$src1),
-          "$dst = $src1",
-          []>;
+            [(set IntRegs:$dst, (sub s10ExtPred:$src1, IntRegs:$src2))]>,
+            ImmRegRel;
 
-let neverHasSideEffects = 1, isPredicable = 1 in
-def TFR64 : ALU32_ri<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
-          "$dst = $src1",
-          []>;
+
+multiclass TFR_Pred<bit PredNot> {
+  let PredSense = #!if(PredNot, "false", "true") in {
+    def _c#NAME# : ALU32_rr<(outs IntRegs:$dst),
+                            (ins PredRegs:$src1, IntRegs:$src2),
+            !if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2",
+            []>;
+    // Predicate new
+    let PNewValue = "new" in
+    def _cdn#NAME# : ALU32_rr<(outs IntRegs:$dst),
+                              (ins PredRegs:$src1, IntRegs:$src2),
+            !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2",
+            []>;
+  }
+}
+
+let InputType = "reg", neverHasSideEffects = 1 in
+multiclass TFR_base<string CextOp> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp in {
+    let isPredicable = 1 in
+    def #NAME# : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
+            "$dst = $src1",
+            []>;
+
+    let  isPredicated = 1 in {
+      defm Pt : TFR_Pred<0>;
+      defm NotPt : TFR_Pred<1>;
+    }
+  }
+}
+
+multiclass TFR64_Pred<bit PredNot> {
+  let PredSense = #!if(PredNot, "false", "true") in {
+    def _c#NAME# : ALU32_rr<(outs DoubleRegs:$dst),
+                            (ins PredRegs:$src1, DoubleRegs:$src2),
+            !if(PredNot, "if (!$src1", "if ($src1")#") $dst = $src2",
+            []>;
+    // Predicate new
+    let PNewValue = "new" in
+    def _cdn#NAME# : ALU32_rr<(outs DoubleRegs:$dst),
+                              (ins PredRegs:$src1, DoubleRegs:$src2),
+            !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = $src2",
+            []>;
+  }
+}
+
+let InputType = "reg", neverHasSideEffects = 1 in
+multiclass TFR64_base<string CextOp> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp in {
+    let isPredicable = 1 in
+    def #NAME# : ALU32_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
+            "$dst = $src1",
+            []>;
+
+    let  isPredicated = 1 in {
+      defm Pt : TFR64_Pred<0>;
+      defm NotPt : TFR64_Pred<1>;
+    }
+  }
+}
+
+
+multiclass TFRI_Pred<bit PredNot> {
+  let PredSense = #!if(PredNot, "false", "true") in {
+    def _c#NAME# : ALU32_ri<(outs IntRegs:$dst),
+                            (ins PredRegs:$src1, s12Ext:$src2),
+            !if(PredNot, "if (!$src1", "if ($src1")#") $dst = #$src2",
+            []>;
+
+    // Predicate new
+    let PNewValue = "new" in
+    def _cdn#NAME# : ALU32_rr<(outs IntRegs:$dst),
+                              (ins PredRegs:$src1, s12Ext:$src2),
+            !if(PredNot, "if (!$src1", "if ($src1")#".new) $dst = #$src2",
+            []>;
+  }
+}
+
+let InputType = "imm", isExtendable = 1, isExtentSigned = 1 in
+multiclass TFRI_base<string CextOp> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#I in {
+    let opExtendable = 1, opExtentBits = 16, isMoveImm = 1, isPredicable = 1,
+    isReMaterializable = 1 in
+    def #NAME# : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1),
+            "$dst = #$src1",
+            [(set (i32 IntRegs:$dst), s16ExtPred:$src1)]>;
+
+    let opExtendable = 2,  opExtentBits = 12, neverHasSideEffects = 1,
+    isPredicated = 1 in {
+      defm Pt    : TFRI_Pred<0>;
+      defm NotPt : TFRI_Pred<1>;
+    }
+  }
+}
+
+defm TFRI : TFRI_base<"TFR">, ImmRegRel, PredNewRel;
+defm TFR : TFR_base<"TFR">, ImmRegRel, PredNewRel;
+defm TFR64 : TFR64_base<"TFR64">, ImmRegRel, PredNewRel;
 
 // Transfer control register.
 let neverHasSideEffects = 1 in
@@ -318,17 +334,50 @@ def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1),
 //===----------------------------------------------------------------------===//
 
 // Combine.
-let isPredicable = 1, neverHasSideEffects = 1 in
-def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst),
-            (ins IntRegs:$src1, IntRegs:$src2),
-            "$dst = combine($src1, $src2)",
-            []>;
 
-let neverHasSideEffects = 1 in
-def COMBINE_ii : ALU32_ii<(outs DoubleRegs:$dst),
-            (ins s8Imm:$src1, s8Imm:$src2),
-            "$dst = combine(#$src1, #$src2)",
-            []>;
+def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
+  [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+
+def HexagonWrapperCombineII :
+  SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>;
+def HexagonWrapperCombineRR :
+  SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>;
+
+// Combines the two integer registers SRC1 and SRC2 into a double register.
+let isPredicable = 1 in
+def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1,
+                                                       IntRegs:$src2),
+  "$dst = combine($src1, $src2)",
+  [(set (i64 DoubleRegs:$dst),
+        (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1),
+                                      (i32 IntRegs:$src2))))]>;
+
+// Rd=combine(Rt.[HL], Rs.[HL])
+class COMBINE_halves<string A, string B>: ALU32_rr<(outs IntRegs:$dst),
+                                                   (ins IntRegs:$src1,
+                                                        IntRegs:$src2),
+  "$dst = combine($src1."# A #", $src2."# B #")", []>;
+
+let isPredicable = 1 in {
+  def COMBINE_hh : COMBINE_halves<"H", "H">;
+  def COMBINE_hl : COMBINE_halves<"H", "L">;
+  def COMBINE_lh : COMBINE_halves<"L", "H">;
+  def COMBINE_ll : COMBINE_halves<"L", "L">;
+}
+
+def : Pat<(i32 (trunc (i64 (srl (i64 DoubleRegs:$a), (i32 16))))),
+  (COMBINE_lh (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_hireg),
+              (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_loreg))>;
+
+// Combines the two immediates SRC1 and SRC2 into a double register.
+class COMBINE_imm<Operand imm1, Operand imm2, PatLeaf pat1, PatLeaf pat2> :
+  ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2),
+  "$dst = combine(#$src1, #$src2)",
+  [(set (i64 DoubleRegs:$dst),
+        (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>;
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in
+def COMBINE_Ii : COMBINE_imm<s8Ext, s8Imm, s8ExtPred, s8ImmPred>;
 
 // Mux.
 def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
@@ -337,32 +386,38 @@ def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
             "$dst = vmux($src1, $src2, $src3)",
             []>;
 
+let CextOpcode = "MUX", InputType = "reg" in
 def MUX_rr : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
                                             IntRegs:$src2, IntRegs:$src3),
              "$dst = mux($src1, $src2, $src3)",
-             [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
-                                                    (i32 IntRegs:$src2),
-                                                    (i32 IntRegs:$src3))))]>;
+             [(set (i32 IntRegs:$dst),
+                   (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2),
+                                (i32 IntRegs:$src3))))]>, ImmRegRel;
 
-def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8,
+CextOpcode = "MUX", InputType = "imm" in
+def MUX_ir : ALU32_ir<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2,
                                                 IntRegs:$src3),
              "$dst = mux($src1, #$src2, $src3)",
-             [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
-                                                    s8ImmPred:$src2,
-                                                    (i32 IntRegs:$src3))))]>;
+             [(set (i32 IntRegs:$dst),
+                   (i32 (select (i1 PredRegs:$src1), s8ExtPred:$src2,
+                                (i32 IntRegs:$src3))))]>, ImmRegRel;
 
+let isExtendable = 1, opExtendable = 3, isExtentSigned = 1, opExtentBits = 8,
+CextOpcode = "MUX", InputType = "imm" in
 def MUX_ri : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2,
-                                                s8Imm:$src3),
+                                                s8Ext:$src3),
              "$dst = mux($src1, $src2, #$src3)",
-             [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
-                                                    (i32 IntRegs:$src2),
-                                                    s8ImmPred:$src3)))]>;
+             [(set (i32 IntRegs:$dst),
+                   (i32 (select (i1 PredRegs:$src1), (i32 IntRegs:$src2),
+                                 s8ExtPred:$src3)))]>, ImmRegRel;
 
-def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Imm:$src2,
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in
+def MUX_ii : ALU32_ii<(outs IntRegs:$dst), (ins PredRegs:$src1, s8Ext:$src2,
                                                 s8Imm:$src3),
              "$dst = mux($src1, #$src2, #$src3)",
              [(set (i32 IntRegs:$dst), (i32 (select (i1 PredRegs:$src1),
-                                                    s8ImmPred:$src2,
+                                                    s8ExtPred:$src2,
                                                     s8ImmPred:$src3)))]>;
 
 // Shift halfword.
@@ -407,7 +462,6 @@ def ZXTH : ALU32_rr<(outs IntRegs:$dst), (ins IntRegs:$src1),
 //===----------------------------------------------------------------------===//
 
 // Conditional combine.
-
 let neverHasSideEffects = 1, isPredicated = 1 in
 def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst),
             (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3),
@@ -432,73 +486,41 @@ def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst),
             "if (!$src1.new) $dst = combine($src2, $src3)",
             []>;
 
-// Conditional transfer.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR_cPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1, IntRegs:$src2),
-              "if ($src1) $dst = $src2",
-              []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR_cNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
-                                                    IntRegs:$src2),
-                 "if (!$src1) $dst = $src2",
-                 []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR64_cPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
-                                                   DoubleRegs:$src2),
-              "if ($src1) $dst = $src2",
-              []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR64_cNotPt : ALU32_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1,
-                                                    DoubleRegs:$src2),
-                 "if (!$src1) $dst = $src2",
-                 []>;
+// Compare.
+defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", "CMPGTU", setugt>, ImmRegRel;
+defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", "CMPGT", setgt>, ImmRegRel;
+defm CMPLT : CMP32_rr<"cmp.lt", setlt>;
+defm CMPLTU : CMP32_rr<"cmp.ltu", setult>;
+defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", "CMPEQ", seteq>, ImmRegRel;
+defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>;
+defm CMPGEU : CMP32_ri_u8<"cmp.geu", setuge>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFRI_cPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1, s12Imm:$src2),
-               "if ($src1) $dst = #$src2",
-               []>;
+def CTLZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
+    "$dst = cl0($src1)",
+    [(set (i32 IntRegs:$dst), (ctlz (i32 IntRegs:$src1)))]>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFRI_cNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
-                                                     s12Imm:$src2),
-                  "if (!$src1) $dst = #$src2",
-                  []>;
+def CTTZ_rr : SInst<(outs IntRegs:$dst), (ins IntRegs:$src1),
+    "$dst = ct0($src1)",
+    [(set (i32 IntRegs:$dst), (cttz (i32 IntRegs:$src1)))]>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR_cdnPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
-                                                   IntRegs:$src2),
-                "if ($src1.new) $dst = $src2",
-                []>;
+def CTLZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
+    "$dst = cl0($src1)",
+    [(set (i32 IntRegs:$dst), (i32 (trunc (ctlz (i64 DoubleRegs:$src1)))))]>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFR_cdnNotPt : ALU32_rr<(outs IntRegs:$dst), (ins PredRegs:$src1,
-                                                      IntRegs:$src2),
-                   "if (!$src1.new) $dst = $src2",
-                   []>;
+def CTTZ64_rr : SInst<(outs IntRegs:$dst), (ins DoubleRegs:$src1),
+    "$dst = ct0($src1)",
+    [(set (i32 IntRegs:$dst), (i32 (trunc (cttz (i64 DoubleRegs:$src1)))))]>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFRI_cdnPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
-                                                    s12Imm:$src2),
-                 "if ($src1.new) $dst = #$src2",
-                 []>;
+def TSTBIT_rr : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, IntRegs:$src2),
+    "$dst = tstbit($src1, $src2)",
+    [(set (i1 PredRegs:$dst),
+          (setne (and (shl 1, (i32 IntRegs:$src2)), (i32 IntRegs:$src1)), 0))]>;
 
-let neverHasSideEffects = 1, isPredicated = 1 in
-def TFRI_cdnNotPt : ALU32_ri<(outs IntRegs:$dst), (ins PredRegs:$src1,
-                                                       s12Imm:$src2),
-                    "if (!$src1.new) $dst = #$src2",
-                    []>;
+def TSTBIT_ri : SInst<(outs PredRegs:$dst), (ins IntRegs:$src1, u5Imm:$src2),
+    "$dst = tstbit($src1, $src2)",
+    [(set (i1 PredRegs:$dst),
+          (setne (and (shl 1, (u5ImmPred:$src2)), (i32 IntRegs:$src1)), 0))]>;
 
-// Compare.
-defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", setugt>;
-defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", setgt>;
-defm CMPLT : CMP32_rr<"cmp.lt", setlt>;
-defm CMPLTU : CMP32_rr<"cmp.ltu", setult>;
-defm CMPEQ : CMP32_rr_ri_s10<"cmp.eq", seteq>;
-defm CMPGE : CMP32_ri_s8<"cmp.ge", setge>;
-defm CMPGEU : CMP32_ri_u8<"cmp.geu", setuge>;
 //===----------------------------------------------------------------------===//
 // ALU32/PRED -
 //===----------------------------------------------------------------------===//
@@ -615,11 +637,6 @@ def SUB64_rr : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1,
 
 // Subtract halfword.
 
-// Transfer register.
-let neverHasSideEffects = 1 in
-def TFR_64 : ALU64_rr<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1),
-             "$dst = $src1",
-             []>;
 //===----------------------------------------------------------------------===//
 // ALU64/ALU -
 //===----------------------------------------------------------------------===//
@@ -825,21 +842,146 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, isPredicated = 1,
 // LD +
 //===----------------------------------------------------------------------===//
 ///
-///
-// Load doubleword.
-let isPredicable = 1 in
-def LDrid : LDInst<(outs DoubleRegs:$dst),
-            (ins MEMri:$addr),
-            "$dst = memd($addr)",
-            [(set (i64 DoubleRegs:$dst), (i64 (load ADDRriS11_3:$addr)))]>;
+// Load -- MEMri operand
+multiclass LD_MEMri_Pbase<string mnemonic, RegisterClass RC,
+                          bit isNot, bit isPredNew> {
+  let PNewValue = #!if(isPredNew, "new", "") in
+  def #NAME# : LDInst2<(outs RC:$dst),
+                       (ins PredRegs:$src1, MEMri:$addr),
+            #!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#"$dst = "#mnemonic#"($addr)",
+            []>;
+}
 
-let isPredicable = 1, AddedComplexity = 20 in
-def LDrid_indexed : LDInst<(outs DoubleRegs:$dst),
-            (ins IntRegs:$src1, s11_3Imm:$offset),
-            "$dst = memd($src1+#$offset)",
-            [(set (i64 DoubleRegs:$dst),
-                  (i64 (load (add (i32 IntRegs:$src1),
-                                  s11_3ImmPred:$offset))))]>;
+multiclass LD_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+  let PredSense = #!if(PredNot, "false", "true") in {
+    defm _c#NAME# : LD_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME# : LD_MEMri_Pbase<mnemonic, RC, PredNot, 1>;
+  }
+}
+
+let isExtendable = 1, neverHasSideEffects = 1 in
+multiclass LD_MEMri<string mnemonic, string CextOp, RegisterClass RC,
+                    bits<5> ImmBits, bits<5> PredImmBits> {
+
+  let CextOpcode = CextOp, BaseOpcode = CextOp in {
+    let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits,
+        isPredicable = 1 in
+      def #NAME# : LDInst2<(outs RC:$dst), (ins MEMri:$addr),
+                   "$dst = "#mnemonic#"($addr)",
+                   []>;
+
+    let opExtendable = 3, isExtentSigned = 0, opExtentBits = PredImmBits,
+        isPredicated = 1 in {
+      defm Pt : LD_MEMri_Pred<mnemonic, RC, 0 >;
+      defm NotPt : LD_MEMri_Pred<mnemonic, RC, 1 >;
+    }
+  }
+}
+
+let addrMode = BaseImmOffset, isMEMri = "true" in {
+  defm LDrib: LD_MEMri < "memb", "LDrib", IntRegs, 11, 6>, AddrModeRel;
+  defm LDriub: LD_MEMri < "memub" , "LDriub", IntRegs, 11, 6>, AddrModeRel;
+  defm LDrih: LD_MEMri < "memh", "LDrih", IntRegs, 12, 7>, AddrModeRel;
+  defm LDriuh: LD_MEMri < "memuh", "LDriuh", IntRegs, 12, 7>, AddrModeRel;
+  defm LDriw: LD_MEMri < "memw", "LDriw", IntRegs, 13, 8>, AddrModeRel;
+  defm LDrid: LD_MEMri < "memd", "LDrid", DoubleRegs, 14, 9>, AddrModeRel;
+}
+
+def : Pat < (i32 (sextloadi8 ADDRriS11_0:$addr)),
+            (LDrib ADDRriS11_0:$addr) >;
+
+def : Pat < (i32 (zextloadi8 ADDRriS11_0:$addr)),
+            (LDriub ADDRriS11_0:$addr) >;
+
+def : Pat < (i32 (sextloadi16 ADDRriS11_1:$addr)),
+            (LDrih ADDRriS11_1:$addr) >;
+
+def : Pat < (i32 (zextloadi16 ADDRriS11_1:$addr)),
+            (LDriuh ADDRriS11_1:$addr) >;
+
+def : Pat < (i32 (load ADDRriS11_2:$addr)),
+            (LDriw ADDRriS11_2:$addr) >;
+
+def : Pat < (i64 (load ADDRriS11_3:$addr)),
+            (LDrid ADDRriS11_3:$addr) >;
+
+
+// Load - Base with Immediate offset addressing mode
+multiclass LD_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
+                        bit isNot, bit isPredNew> {
+  let PNewValue = #!if(isPredNew, "new", "") in
+  def #NAME# : LDInst2<(outs RC:$dst),
+                       (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3),
+            #!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#"$dst = "#mnemonic#"($src2+#$src3)",
+            []>;
+}
+
+multiclass LD_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
+                        bit PredNot> {
+  let PredSense = #!if(PredNot, "false", "true") in {
+    defm _c#NAME# : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME# : LD_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>;
+  }
+}
+
+let isExtendable = 1, neverHasSideEffects = 1 in
+multiclass LD_Idxd<string mnemonic, string CextOp, RegisterClass RC,
+                   Operand ImmOp, Operand predImmOp, bits<5> ImmBits,
+                   bits<5> PredImmBits> {
+
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+    let opExtendable = 2, isExtentSigned = 1, opExtentBits = ImmBits,
+        isPredicable = 1, AddedComplexity = 20 in
+      def #NAME# : LDInst2<(outs RC:$dst), (ins IntRegs:$src1, ImmOp:$offset),
+                   "$dst = "#mnemonic#"($src1+#$offset)",
+                   []>;
+
+    let opExtendable = 3, isExtentSigned = 0, opExtentBits = PredImmBits,
+        isPredicated = 1 in {
+      defm Pt : LD_Idxd_Pred<mnemonic, RC, predImmOp, 0 >;
+      defm NotPt : LD_Idxd_Pred<mnemonic, RC, predImmOp, 1 >;
+    }
+  }
+}
+
+let addrMode = BaseImmOffset in {
+  defm LDrib_indexed: LD_Idxd <"memb", "LDrib", IntRegs, s11_0Ext, u6_0Ext,
+                               11, 6>, AddrModeRel;
+  defm LDriub_indexed: LD_Idxd <"memub" , "LDriub", IntRegs, s11_0Ext, u6_0Ext,
+                                11, 6>, AddrModeRel;
+  defm LDrih_indexed: LD_Idxd <"memh", "LDrih", IntRegs, s11_1Ext, u6_1Ext,
+                               12, 7>, AddrModeRel;
+  defm LDriuh_indexed: LD_Idxd <"memuh", "LDriuh", IntRegs, s11_1Ext, u6_1Ext,
+                                12, 7>, AddrModeRel;
+  defm LDriw_indexed: LD_Idxd <"memw", "LDriw", IntRegs, s11_2Ext, u6_2Ext,
+                               13, 8>, AddrModeRel;
+  defm LDrid_indexed: LD_Idxd <"memd", "LDrid", DoubleRegs, s11_3Ext, u6_3Ext,
+                               14, 9>, AddrModeRel;
+}
+
+let AddedComplexity = 20 in {
+def : Pat < (i32 (sextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))),
+            (LDrib_indexed IntRegs:$src1, s11_0ExtPred:$offset) >;
+
+def : Pat < (i32 (zextloadi8 (add IntRegs:$src1, s11_0ExtPred:$offset))),
+            (LDriub_indexed IntRegs:$src1, s11_0ExtPred:$offset) >;
+
+def : Pat < (i32 (sextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))),
+            (LDrih_indexed IntRegs:$src1, s11_1ExtPred:$offset) >;
+
+def : Pat < (i32 (zextloadi16 (add IntRegs:$src1, s11_1ExtPred:$offset))),
+            (LDriuh_indexed IntRegs:$src1, s11_1ExtPred:$offset) >;
+
+def : Pat < (i32 (load (add IntRegs:$src1, s11_2ExtPred:$offset))),
+            (LDriw_indexed IntRegs:$src1, s11_2ExtPred:$offset) >;
+
+def : Pat < (i64 (load (add IntRegs:$src1, s11_3ExtPred:$offset))),
+            (LDrid_indexed IntRegs:$src1, s11_3ExtPred:$offset) >;
+}
 
 let neverHasSideEffects = 1 in
 def LDrid_GP : LDInst2<(outs DoubleRegs:$dst),
@@ -915,77 +1057,10 @@ let hasCtrlDep = 1, neverHasSideEffects = 1 in {
                     PredNewRel;
 }
 
-// Load doubleword conditionally.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_cPt : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1) $dst = memd($addr)",
-            []>;
-
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_cNotPt : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1) $dst = memd($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_indexed_cPt : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
-            "if ($src1) $dst = memd($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_indexed_cNotPt : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
-            "if (!$src1) $dst = memd($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_cdnPt : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1.new) $dst = memd($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_cdnNotPt : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1.new) $dst = memd($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_indexed_cdnPt : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
-            "if ($src1.new) $dst = memd($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrid_indexed_cdnNotPt : LDInst2<(outs DoubleRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3),
-            "if (!$src1.new) $dst = memd($src2+#$src3)",
-            []>;
-
-
-// Load byte.
-let isPredicable = 1 in
-def LDrib : LDInst<(outs IntRegs:$dst),
-            (ins MEMri:$addr),
-            "$dst = memb($addr)",
-            [(set (i32 IntRegs:$dst), (i32 (sextloadi8 ADDRriS11_0:$addr)))]>;
-
 // Load byte any-extend.
 def : Pat < (i32 (extloadi8 ADDRriS11_0:$addr)),
             (i32 (LDrib ADDRriS11_0:$addr)) >;
 
-// Indexed load byte.
-let isPredicable = 1, AddedComplexity = 20 in
-def LDrib_indexed : LDInst<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, s11_0Imm:$offset),
-            "$dst = memb($src1+#$offset)",
-            [(set (i32 IntRegs:$dst),
-                  (i32 (sextloadi8 (add (i32 IntRegs:$src1),
-                                        s11_0ImmPred:$offset))))]>;
-
 // Indexed load byte any-extend.
 let AddedComplexity = 20 in
 def : Pat < (i32 (extloadi8 (add IntRegs:$src1, s11_0ImmPred:$offset))),
@@ -1012,71 +1087,6 @@ def LDub_GP : LDInst2<(outs IntRegs:$dst),
             []>,
             Requires<[NoV4T]>;
 
-// Load byte conditionally.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1) $dst = memb($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1) $dst = memb($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_indexed_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
-            "if ($src1) $dst = memb($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
-            "if (!$src1) $dst = memb($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1.new) $dst = memb($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1.new) $dst = memb($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
-            "if ($src1.new) $dst = memb($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrib_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
-            "if (!$src1.new) $dst = memb($src2+#$src3)",
-            []>;
-
-
-// Load halfword.
-let isPredicable = 1 in
-def LDrih : LDInst<(outs IntRegs:$dst),
-            (ins MEMri:$addr),
-            "$dst = memh($addr)",
-            [(set (i32 IntRegs:$dst), (i32 (sextloadi16 ADDRriS11_1:$addr)))]>;
-
-let isPredicable = 1, AddedComplexity = 20 in
-def LDrih_indexed : LDInst<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, s11_1Imm:$offset),
-            "$dst = memh($src1+#$offset)",
-            [(set (i32 IntRegs:$dst),
-                  (i32 (sextloadi16 (add (i32 IntRegs:$src1),
-                                         s11_1ImmPred:$offset))))]>;
-
 def : Pat < (i32 (extloadi16 ADDRriS11_1:$addr)),
             (i32 (LDrih ADDRriS11_1:$addr))>;
 
@@ -1105,73 +1115,9 @@ def LDuh_GP : LDInst2<(outs IntRegs:$dst),
             []>,
             Requires<[NoV4T]>;
 
-// Load halfword conditionally.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1) $dst = memh($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1) $dst = memh($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_indexed_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
-            "if ($src1) $dst = memh($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
-            "if (!$src1) $dst = memh($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1.new) $dst = memh($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1.new) $dst = memh($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
-            "if ($src1.new) $dst = memh($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDrih_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
-            "if (!$src1.new) $dst = memh($src2+#$src3)",
-            []>;
-
-// Load unsigned byte.
-let isPredicable = 1 in
-def LDriub : LDInst<(outs IntRegs:$dst),
-            (ins MEMri:$addr),
-            "$dst = memub($addr)",
-            [(set (i32 IntRegs:$dst), (i32 (zextloadi8 ADDRriS11_0:$addr)))]>;
-
 def : Pat < (i32 (zextloadi1 ADDRriS11_0:$addr)),
             (i32 (LDriub ADDRriS11_0:$addr))>;
 
-let isPredicable = 1, AddedComplexity = 20 in
-def LDriub_indexed : LDInst<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, s11_0Imm:$offset),
-            "$dst = memub($src1+#$offset)",
-            [(set (i32 IntRegs:$dst),
-                  (i32 (zextloadi8 (add (i32 IntRegs:$src1),
-                                        s11_0ImmPred:$offset))))]>;
-
 let AddedComplexity = 20 in
 def : Pat < (i32 (zextloadi1 (add IntRegs:$src1, s11_0ImmPred:$offset))),
             (i32 (LDriub_indexed IntRegs:$src1, s11_0ImmPred:$offset))>;
@@ -1183,71 +1129,7 @@ def LDriub_GP : LDInst2<(outs IntRegs:$dst),
             []>,
             Requires<[NoV4T]>;
 
-// Load unsigned byte conditionally.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1) $dst = memub($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1) $dst = memub($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_indexed_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
-            "if ($src1) $dst = memub($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
-            "if (!$src1) $dst = memub($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1.new) $dst = memub($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1.new) $dst = memub($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
-            "if ($src1.new) $dst = memub($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriub_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3),
-            "if (!$src1.new) $dst = memub($src2+#$src3)",
-            []>;
-
 // Load unsigned halfword.
-let isPredicable = 1 in
-def LDriuh : LDInst<(outs IntRegs:$dst),
-            (ins MEMri:$addr),
-            "$dst = memuh($addr)",
-            [(set (i32 IntRegs:$dst), (i32 (zextloadi16 ADDRriS11_1:$addr)))]>;
-
-// Indexed load unsigned halfword.
-let isPredicable = 1, AddedComplexity = 20 in
-def LDriuh_indexed : LDInst<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, s11_1Imm:$offset),
-            "$dst = memuh($src1+#$offset)",
-            [(set (i32 IntRegs:$dst),
-                  (i32 (zextloadi16 (add (i32 IntRegs:$src1),
-                                         s11_1ImmPred:$offset))))]>;
-
 let neverHasSideEffects = 1 in
 def LDriuh_GP : LDInst2<(outs IntRegs:$dst),
             (ins globaladdress:$global, u16Imm:$offset),
@@ -1255,62 +1137,6 @@ def LDriuh_GP : LDInst2<(outs IntRegs:$dst),
             []>,
             Requires<[NoV4T]>;
 
-// Load unsigned halfword conditionally.
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1) $dst = memuh($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1) $dst = memuh($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_indexed_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
-            "if ($src1) $dst = memuh($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
-            "if (!$src1) $dst = memuh($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1.new) $dst = memuh($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1.new) $dst = memuh($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
-            "if ($src1.new) $dst = memuh($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriuh_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3),
-            "if (!$src1.new) $dst = memuh($src2+#$src3)",
-            []>;
-
-
-// Load word.
-let isPredicable = 1 in
-def LDriw : LDInst<(outs IntRegs:$dst),
-            (ins MEMri:$addr), "$dst = memw($addr)",
-            [(set IntRegs:$dst, (i32 (load ADDRriS11_2:$addr)))]>;
-
 // Load predicate.
 let Defs = [R10,R11,D5], neverHasSideEffects = 1 in
 def LDriw_pred : LDInst<(outs PredRegs:$dst),
@@ -1319,13 +1145,6 @@ def LDriw_pred : LDInst<(outs PredRegs:$dst),
             []>;
 
 // Indexed load.
-let isPredicable = 1, AddedComplexity = 20 in
-def LDriw_indexed : LDInst<(outs IntRegs:$dst),
-            (ins IntRegs:$src1, s11_2Imm:$offset),
-            "$dst = memw($src1+#$offset)",
-            [(set IntRegs:$dst, (i32 (load (add IntRegs:$src1,
-                                           s11_2ImmPred:$offset))))]>;
-
 let neverHasSideEffects = 1 in
 def LDriw_GP : LDInst2<(outs IntRegs:$dst),
             (ins globaladdress:$global, u16Imm:$offset),
@@ -1340,56 +1159,6 @@ def LDw_GP : LDInst2<(outs IntRegs:$dst),
             []>,
             Requires<[NoV4T]>;
 
-// Load word conditionally.
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1) $dst = memw($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1) $dst = memw($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_indexed_cPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
-            "if ($src1) $dst = memw($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_indexed_cNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
-            "if (!$src1) $dst = memw($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if ($src1.new) $dst = memw($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, MEMri:$addr),
-            "if (!$src1.new) $dst = memw($addr)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_indexed_cdnPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
-            "if ($src1.new) $dst = memw($src2+#$src3)",
-            []>;
-
-let neverHasSideEffects = 1, isPredicated = 1 in
-def LDriw_indexed_cdnNotPt : LDInst2<(outs IntRegs:$dst),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3),
-            "if (!$src1.new) $dst = memw($src2+#$src3)",
-            []>;
-
 // Deallocate stack frame.
 let Defs = [R29, R30, R31], Uses = [R29], neverHasSideEffects = 1 in {
   def DEALLOCFRAME : LDInst2<(outs), (ins i32imm:$amt1),
@@ -1616,19 +1385,6 @@ def SUBri_acc : MInst_acc<(outs IntRegs: $dst), (ins IntRegs:$src1,
 ///    last operand.
 ///
 // Store doubleword.
-let isPredicable = 1 in
-def STrid : STInst<(outs),
-            (ins MEMri:$addr, DoubleRegs:$src1),
-            "memd($addr) = $src1",
-            [(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr)]>;
-
-// Indexed store double word.
-let AddedComplexity = 10, isPredicable = 1 in
-def STrid_indexed : STInst<(outs),
-            (ins IntRegs:$src1, s11_3Imm:$src2,  DoubleRegs:$src3),
-            "memd($src1+#$src2) = $src3",
-            [(store (i64 DoubleRegs:$src3),
-                    (add (i32 IntRegs:$src1), s11_3ImmPred:$src2))]>;
 
 let neverHasSideEffects = 1 in
 def STrid_GP : STInst2<(outs),
@@ -1653,42 +1409,6 @@ def POST_STdri : STInstPI<(outs IntRegs:$dst),
                         s4_3ImmPred:$offset))],
             "$src2 = $dst">;
 
-// Store doubleword conditionally.
-// if ([!]Pv) memd(Rs+#u6:3)=Rtt
-// if (Pv) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_cPt : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
-            "if ($src1) memd($addr) = $src2",
-            []>;
-
-// if (!Pv) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_cNotPt : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
-            "if (!$src1) memd($addr) = $src2",
-            []>;
-
-// if (Pv) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_indexed_cPt : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
-                 DoubleRegs:$src4),
-            "if ($src1) memd($src2+#$src3) = $src4",
-            []>;
-
-// if (!Pv) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_indexed_cNotPt : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
-                 DoubleRegs:$src4),
-            "if (!$src1) memd($src2+#$src3) = $src4",
-            []>;
-
 // if ([!]Pv) memd(Rx++#s4:3)=Rtt
 // if (Pv) memd(Rx++#s4:3)=Rtt
 let AddedComplexity = 10, neverHasSideEffects = 1,
@@ -1710,21 +1430,149 @@ def POST_STdri_cNotPt : STInst2PI<(outs IntRegs:$dst),
             [],
             "$src3 = $dst">;
 
+//===----------------------------------------------------------------------===//
+// multiclass for the store instructions with MEMri operand.
+//===----------------------------------------------------------------------===//
+multiclass ST_MEMri_Pbase<string mnemonic, RegisterClass RC, bit isNot,
+                          bit isPredNew> {
+  let PNewValue = #!if(isPredNew, "new", "") in
+  def #NAME# : STInst2<(outs),
+            (ins PredRegs:$src1, MEMri:$addr, RC: $src2),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#mnemonic#"($addr) = $src2",
+            []>;
+}
 
-// Store byte.
-// memb(Rs+#s11:0)=Rt
-let isPredicable = 1 in
-def STrib : STInst<(outs),
-            (ins MEMri:$addr, IntRegs:$src1),
-            "memb($addr) = $src1",
-            [(truncstorei8 (i32 IntRegs:$src1), ADDRriS11_0:$addr)]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def STrib_indexed : STInst<(outs),
-            (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3),
-            "memb($src1+#$src2) = $src3",
-            [(truncstorei8 (i32 IntRegs:$src3), (add (i32 IntRegs:$src1),
-                                                     s11_0ImmPred:$src2))]>;
+multiclass ST_MEMri_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+  let PredSense = #!if(PredNot, "false", "true") in {
+    defm _c#NAME# : ST_MEMri_Pbase<mnemonic, RC, PredNot, 0>;
+
+    // Predicate new
+    let validSubTargets = HasV4SubT, Predicates = [HasV4T] in
+    defm _cdn#NAME#_V4 : ST_MEMri_Pbase<mnemonic, RC, PredNot, 1>;
+  }
+}
+
+let isExtendable = 1, isNVStorable = 1, neverHasSideEffects = 1 in
+multiclass ST_MEMri<string mnemonic, string CextOp, RegisterClass RC,
+                    bits<5> ImmBits, bits<5> PredImmBits> {
+
+  let CextOpcode = CextOp, BaseOpcode = CextOp in {
+    let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
+         isPredicable = 1 in
+    def #NAME# : STInst2<(outs),
+            (ins MEMri:$addr, RC:$src),
+            #mnemonic#"($addr) = $src",
+            []>;
+
+    let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits,
+        isPredicated = 1 in {
+      defm Pt : ST_MEMri_Pred<mnemonic, RC, 0>;
+      defm NotPt : ST_MEMri_Pred<mnemonic, RC, 1>;
+    }
+  }
+}
+
+let addrMode = BaseImmOffset, isMEMri = "true" in {
+  defm STrib: ST_MEMri < "memb", "STrib", IntRegs, 11, 6>, AddrModeRel;
+  defm STrih: ST_MEMri < "memh", "STrih", IntRegs, 12, 7>, AddrModeRel;
+  defm STriw: ST_MEMri < "memw", "STriw", IntRegs, 13, 8>, AddrModeRel;
+
+  let isNVStorable = 0 in
+  defm STrid: ST_MEMri < "memd", "STrid", DoubleRegs, 14, 9>, AddrModeRel;
+}
+
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), ADDRriS11_0:$addr),
+          (STrib ADDRriS11_0:$addr, (i32 IntRegs:$src1))>;
+
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), ADDRriS11_1:$addr),
+          (STrih ADDRriS11_1:$addr, (i32 IntRegs:$src1))>;
+
+def : Pat<(store (i32 IntRegs:$src1), ADDRriS11_2:$addr),
+          (STriw ADDRriS11_2:$addr, (i32 IntRegs:$src1))>;
+
+def : Pat<(store (i64 DoubleRegs:$src1), ADDRriS11_3:$addr),
+          (STrid ADDRriS11_3:$addr, (i64 DoubleRegs:$src1))>;
+
+
+//===----------------------------------------------------------------------===//
+// multiclass for the store instructions with base+immediate offset
+// addressing mode
+//===----------------------------------------------------------------------===//
+multiclass ST_Idxd_Pbase<string mnemonic, RegisterClass RC, Operand predImmOp,
+                        bit isNot, bit isPredNew> {
+  let PNewValue = #!if(isPredNew, "new", "") in
+  def #NAME# : STInst2<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#mnemonic#"($src2+#$src3) = $src4",
+            []>;
+}
+
+multiclass ST_Idxd_Pred<string mnemonic, RegisterClass RC, Operand predImmOp,
+                        bit PredNot> {
+  let PredSense = #!if(PredNot, "false", "true"), isPredicated = 1 in {
+    defm _c#NAME# : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 0>;
+
+    // Predicate new
+    let validSubTargets = HasV4SubT, Predicates = [HasV4T] in
+    defm _cdn#NAME#_V4 : ST_Idxd_Pbase<mnemonic, RC, predImmOp, PredNot, 1>;
+  }
+}
+
+let isExtendable = 1, isNVStorable = 1, neverHasSideEffects = 1 in
+multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC,
+                   Operand ImmOp, Operand predImmOp, bits<5> ImmBits,
+                   bits<5> PredImmBits> {
+
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+    let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
+         isPredicable = 1 in
+    def #NAME# : STInst2<(outs),
+            (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+            #mnemonic#"($src1+#$src2) = $src3",
+            []>;
+
+    let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits in {
+      defm Pt : ST_Idxd_Pred<mnemonic, RC, predImmOp, 0>;
+      defm NotPt : ST_Idxd_Pred<mnemonic, RC, predImmOp, 1>;
+    }
+  }
+}
+
+let addrMode = BaseImmOffset, InputType = "reg" in {
+  defm STrib_indexed: ST_Idxd < "memb", "STrib", IntRegs, s11_0Ext,
+                                u6_0Ext, 11, 6>, AddrModeRel, ImmRegRel;
+  defm STrih_indexed: ST_Idxd < "memh", "STrih", IntRegs, s11_1Ext,
+                                u6_1Ext, 12, 7>, AddrModeRel, ImmRegRel;
+  defm STriw_indexed: ST_Idxd < "memw", "STriw", IntRegs, s11_2Ext,
+                                u6_2Ext, 13, 8>, AddrModeRel, ImmRegRel;
+  let isNVStorable = 0 in
+  defm STrid_indexed: ST_Idxd < "memd", "STrid", DoubleRegs, s11_3Ext,
+                                u6_3Ext, 14, 9>, AddrModeRel;
+}
+
+let AddedComplexity = 10 in {
+def : Pat<(truncstorei8 (i32 IntRegs:$src1), (add IntRegs:$src2,
+                                                  s11_0ExtPred:$offset)),
+          (STrib_indexed IntRegs:$src2, s11_0ImmPred:$offset,
+                         (i32 IntRegs:$src1))>;
+
+def : Pat<(truncstorei16 (i32 IntRegs:$src1), (add IntRegs:$src2,
+                                                   s11_1ExtPred:$offset)),
+          (STrih_indexed IntRegs:$src2, s11_1ImmPred:$offset,
+                         (i32 IntRegs:$src1))>;
+
+def : Pat<(store (i32 IntRegs:$src1), (add IntRegs:$src2,
+                                           s11_2ExtPred:$offset)),
+          (STriw_indexed IntRegs:$src2, s11_2ImmPred:$offset,
+                         (i32 IntRegs:$src1))>;
+
+def : Pat<(store (i64 DoubleRegs:$src1), (add IntRegs:$src2,
+                                              s11_3ExtPred:$offset)),
+          (STrid_indexed IntRegs:$src2, s11_3ImmPred:$offset,
+                         (i64 DoubleRegs:$src1))>;
+}
 
 // memb(gp+#u16:0)=Rt
 let neverHasSideEffects = 1 in
@@ -1753,36 +1601,6 @@ def POST_STbri : STInstPI<(outs IntRegs:$dst), (ins IntRegs:$src1,
                             s4_0ImmPred:$offset))],
             "$src2 = $dst">;
 
-// Store byte conditionally.
-// if ([!]Pv) memb(Rs+#u6:0)=Rt
-// if (Pv) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_cPt : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1) memb($addr) = $src2",
-            []>;
-
-// if (!Pv) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_cNotPt : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1) memb($addr) = $src2",
-            []>;
-
-// if (Pv) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_indexed_cPt : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
-            "if ($src1) memb($src2+#$src3) = $src4",
-            []>;
-
-// if (!Pv) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrib_indexed_cNotPt : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
-            "if (!$src1) memb($src2+#$src3) = $src4",
-            []>;
-
 // if ([!]Pv) memb(Rx++#s4:0)=Rt
 // if (Pv) memb(Rx++#s4:0)=Rt
 let hasCtrlDep = 1, isPredicated = 1 in
@@ -1798,23 +1616,6 @@ def POST_STbri_cNotPt : STInst2PI<(outs IntRegs:$dst),
             "if (!$src1) memb($src3++#$offset) = $src2",
             [],"$src3 = $dst">;
 
-
-// Store halfword.
-// memh(Rs+#s11:1)=Rt
-let isPredicable = 1 in
-def STrih : STInst<(outs),
-            (ins MEMri:$addr, IntRegs:$src1),
-            "memh($addr) = $src1",
-            [(truncstorei16 (i32 IntRegs:$src1), ADDRriS11_1:$addr)]>;
-
-
-let AddedComplexity = 10, isPredicable = 1 in
-def STrih_indexed : STInst<(outs),
-            (ins IntRegs:$src1, s11_1Imm:$src2,  IntRegs:$src3),
-            "memh($src1+#$src2) = $src3",
-            [(truncstorei16 (i32 IntRegs:$src3), (add (i32 IntRegs:$src1),
-                                                      s11_1ImmPred:$src2))]>;
-
 let neverHasSideEffects = 1 in
 def STrih_GP : STInst2<(outs),
             (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
@@ -1840,36 +1641,6 @@ def POST_SThri : STInstPI<(outs IntRegs:$dst),
                              s4_1ImmPred:$offset))],
             "$src2 = $dst">;
 
-// Store halfword conditionally.
-// if ([!]Pv) memh(Rs+#u6:1)=Rt
-// if (Pv) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_cPt : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1) memh($addr) = $src2",
-            []>;
-
-// if (!Pv) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_cNotPt : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1) memh($addr) = $src2",
-            []>;
-
-// if (Pv) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_indexed_cPt : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
-            "if ($src1) memh($src2+#$src3) = $src4",
-            []>;
-
-// if (!Pv) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STrih_indexed_cNotPt : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
-            "if (!$src1) memh($src2+#$src3) = $src4",
-            []>;
-
 // if ([!]Pv) memh(Rx++#s4:1)=Rt
 // if (Pv) memh(Rx++#s4:1)=Rt
 let hasCtrlDep = 1, isPredicated = 1 in
@@ -1894,20 +1665,6 @@ def STriw_pred : STInst2<(outs),
             "Error; should not emit",
             []>;
 
-// memw(Rs+#s11:2)=Rt
-let isPredicable = 1 in
-def STriw : STInst<(outs),
-            (ins MEMri:$addr, IntRegs:$src1),
-            "memw($addr) = $src1",
-            [(store (i32 IntRegs:$src1), ADDRriS11_2:$addr)]>;
-
-let AddedComplexity = 10, isPredicable = 1 in
-def STriw_indexed : STInst<(outs),
-            (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
-            "memw($src1+#$src2) = $src3",
-            [(store (i32 IntRegs:$src3),
-                    (add (i32 IntRegs:$src1), s11_2ImmPred:$src2))]>;
-
 let neverHasSideEffects = 1 in
 def STriw_GP : STInst2<(outs),
             (ins globaladdress:$global, u16Imm:$offset, IntRegs:$src),
@@ -1931,36 +1688,6 @@ def POST_STwri : STInstPI<(outs IntRegs:$dst),
                         s4_2ImmPred:$offset))],
             "$src2 = $dst">;
 
-// Store word conditionally.
-// if ([!]Pv) memw(Rs+#u6:2)=Rt
-// if (Pv) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_cPt : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1) memw($addr) = $src2",
-            []>;
-
-// if (!Pv) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_cNotPt : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1) memw($addr) = $src2",
-            []>;
-
-// if (Pv) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_indexed_cPt : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
-            "if ($src1) memw($src2+#$src3) = $src4",
-            []>;
-
-// if (!Pv) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1, isPredicated = 1 in
-def STriw_indexed_cNotPt : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
-            "if (!$src1) memw($src2+#$src3) = $src4",
-            []>;
-
 // if ([!]Pv) memw(Rx++#s4:2)=Rt
 // if (Pv) memw(Rx++#s4:2)=Rt
 let hasCtrlDep = 1, isPredicated = 1 in
@@ -3383,32 +3110,3 @@ include "HexagonInstrInfoV5.td"
 //===----------------------------------------------------------------------===//
 // V5 Instructions -
 //===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Generate mapping table to relate non-predicate instructions with their
-// predicated formats - true and false.
-//
-
-def getPredOpcode : InstrMapping {
-  let FilterClass = "PredRel";
-  // Instructions with the same BaseOpcode and isNVStore values form a row.
-  let RowFields = ["BaseOpcode", "isNVStore", "PNewValue"];
-  // Instructions with the same predicate sense form a column.
-  let ColFields = ["PredSense"];
-  // The key column is the unpredicated instructions.
-  let KeyCol = [""];
-  // Value columns are PredSense=true and PredSense=false
-  let ValueCols = [["true"], ["false"]];
-}
-
-//===----------------------------------------------------------------------===//
-// Generate mapping table to relate predicated instructions with their .new
-// format.
-//
-def getPredNewOpcode : InstrMapping {
-  let FilterClass = "PredNewRel";
-  let RowFields = ["BaseOpcode", "PredSense", "isNVStore"];
-  let ColFields = ["PNewValue"];
-  let KeyCol = [""];
-  let ValueCols = [["new"]];
-}
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index b40fc418c1..ae407db029 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -276,19 +276,31 @@ def TFR_FI_immext_V4 : ALU32_ri<(outs IntRegs:$dst),
 
 // Combine
 // Rdd=combine(Rs, #s8)
-let neverHasSideEffects = 1 in
-def COMBINE_ri_V4 : ALU32_ri<(outs DoubleRegs:$dst),
-            (ins IntRegs:$src1, s8Imm:$src2),
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8,
+    neverHasSideEffects = 1, validSubTargets = HasV4SubT in
+def COMBINE_rI_V4 : ALU32_ri<(outs DoubleRegs:$dst),
+            (ins IntRegs:$src1, s8Ext:$src2),
             "$dst = combine($src1, #$src2)",
             []>,
             Requires<[HasV4T]>;
+
 // Rdd=combine(#s8, Rs)
-let neverHasSideEffects = 1 in
-def COMBINE_ir_V4 : ALU32_ir<(outs DoubleRegs:$dst),
-            (ins s8Imm:$src1, IntRegs:$src2),
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8,
+    neverHasSideEffects = 1, validSubTargets = HasV4SubT in
+def COMBINE_Ir_V4 : ALU32_ir<(outs DoubleRegs:$dst),
+            (ins s8Ext:$src1, IntRegs:$src2),
             "$dst = combine(#$src1, $src2)",
             []>,
             Requires<[HasV4T]>;
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 6,
+    neverHasSideEffects = 1, validSubTargets = HasV4SubT in
+def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst),
+            (ins s8Imm:$src1, u6Ext:$src2),
+            "$dst = combine(#$src1, #$src2)",
+            []>,
+            Requires<[HasV4T]>;
+
 //===----------------------------------------------------------------------===//
 // ALU32/PERM +
 //===----------------------------------------------------------------------===//
@@ -415,15 +427,103 @@ def LDrid_indexed_V4 : LDInst<(outs DoubleRegs:$dst),
                                           (i32 IntRegs:$src2)))))]>,
                     Requires<[HasV4T]>;
 
-let AddedComplexity = 40, isPredicable = 1 in
-def LDrid_indexed_shl_V4 : LDInst<(outs DoubleRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
-                    "$dst=memd($src1+$src2<<#$offset)",
-                    [(set (i64 DoubleRegs:$dst),
-                          (i64 (load (add (i32 IntRegs:$src1),
-                                          (shl (i32 IntRegs:$src2),
-                                               u2ImmPred:$offset)))))]>,
-                    Requires<[HasV4T]>;
+// multiclass for load instructions with base + register offset
+// addressing mode
+multiclass ld_idxd_shl_pbase<string mnemonic, RegisterClass RC, bit isNot,
+                             bit isPredNew> {
+  let PNewValue = #!if(isPredNew, "new", "") in
+  def #NAME# : LDInst2<(outs RC:$dst),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$offset),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#"$dst = "#mnemonic#"($src2+$src3<<#$offset)",
+            []>, Requires<[HasV4T]>;
+}
+
+multiclass ld_idxd_shl_pred<string mnemonic, RegisterClass RC, bit PredNot> {
+  let PredSense = #!if(PredNot, "false", "true") in {
+    defm _c#NAME# : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME# : ld_idxd_shl_pbase<mnemonic, RC, PredNot, 1>;
+  }
+}
+
+let neverHasSideEffects  = 1 in
+multiclass ld_idxd_shl<string mnemonic, string CextOp, RegisterClass RC> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
+    let isPredicable = 1 in
+    def #NAME#_V4 : LDInst2<(outs RC:$dst),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
+            "$dst = "#mnemonic#"($src1+$src2<<#$offset)",
+            []>, Requires<[HasV4T]>;
+
+    let isPredicated = 1 in {
+      defm Pt_V4 : ld_idxd_shl_pred<mnemonic, RC, 0 >;
+      defm NotPt_V4 : ld_idxd_shl_pred<mnemonic, RC, 1>;
+    }
+  }
+}
+
+let addrMode = BaseRegOffset in {
+  defm LDrib_indexed_shl: ld_idxd_shl<"memb", "LDrib", IntRegs>, AddrModeRel;
+  defm LDriub_indexed_shl: ld_idxd_shl<"memub", "LDriub", IntRegs>, AddrModeRel;
+  defm LDrih_indexed_shl: ld_idxd_shl<"memh", "LDrih", IntRegs>, AddrModeRel;
+  defm LDriuh_indexed_shl: ld_idxd_shl<"memuh", "LDriuh", IntRegs>, AddrModeRel;
+  defm LDriw_indexed_shl: ld_idxd_shl<"memw", "LDriw", IntRegs>, AddrModeRel;
+  defm LDrid_indexed_shl: ld_idxd_shl<"memd", "LDrid", DoubleRegs>, AddrModeRel;
+}
+
+// 'def pats' for load instructions with base + register offset and non-zero
+// immediate value. Immediate value is used to left-shift the second
+// register operand.
+let AddedComplexity = 40 in {
+def : Pat <(i32 (sextloadi8 (add IntRegs:$src1,
+                                 (shl IntRegs:$src2, u2ImmPred:$offset)))),
+           (LDrib_indexed_shl_V4 IntRegs:$src1,
+            IntRegs:$src2, u2ImmPred:$offset)>,
+            Requires<[HasV4T]>;
+
+def : Pat <(i32 (zextloadi8 (add IntRegs:$src1,
+                                 (shl IntRegs:$src2, u2ImmPred:$offset)))),
+           (LDriub_indexed_shl_V4 IntRegs:$src1,
+            IntRegs:$src2, u2ImmPred:$offset)>,
+            Requires<[HasV4T]>;
+
+def : Pat <(i32 (extloadi8 (add IntRegs:$src1,
+                                (shl IntRegs:$src2, u2ImmPred:$offset)))),
+           (LDriub_indexed_shl_V4 IntRegs:$src1,
+            IntRegs:$src2, u2ImmPred:$offset)>,
+            Requires<[HasV4T]>;
+
+def : Pat <(i32 (sextloadi16 (add IntRegs:$src1,
+                                  (shl IntRegs:$src2, u2ImmPred:$offset)))),
+           (LDrih_indexed_shl_V4 IntRegs:$src1,
+            IntRegs:$src2, u2ImmPred:$offset)>,
+            Requires<[HasV4T]>;
+
+def : Pat <(i32 (zextloadi16 (add IntRegs:$src1,
+                                  (shl IntRegs:$src2, u2ImmPred:$offset)))),
+           (LDriuh_indexed_shl_V4 IntRegs:$src1,
+            IntRegs:$src2, u2ImmPred:$offset)>,
+            Requires<[HasV4T]>;
+
+def : Pat <(i32 (extloadi16 (add IntRegs:$src1,
+                                 (shl IntRegs:$src2, u2ImmPred:$offset)))),
+           (LDriuh_indexed_shl_V4 IntRegs:$src1,
+            IntRegs:$src2, u2ImmPred:$offset)>,
+            Requires<[HasV4T]>;
+
+def : Pat <(i32 (load (add IntRegs:$src1,
+                           (shl IntRegs:$src2, u2ImmPred:$offset)))),
+           (LDriw_indexed_shl_V4 IntRegs:$src1,
+            IntRegs:$src2, u2ImmPred:$offset)>,
+            Requires<[HasV4T]>;
+
+def : Pat <(i64 (load (add IntRegs:$src1,
+                           (shl IntRegs:$src2, u2ImmPred:$offset)))),
+           (LDrid_indexed_shl_V4 IntRegs:$src1,
+            IntRegs:$src2, u2ImmPred:$offset)>,
+            Requires<[HasV4T]>;
+}
 
 //// Load doubleword conditionally.
 // if ([!]Pv[.new]) Rd=memd(Rs+Rt<<#u2)
@@ -459,42 +559,6 @@ def LDrid_indexed_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
                     []>,
                     Requires<[HasV4T]>;
 
-// if (Pv) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrid_indexed_shl_cPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1) $dst=memd($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrid_indexed_shl_cdnPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1.new) $dst=memd($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrid_indexed_shl_cNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1) $dst=memd($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memd(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrid_indexed_shl_cdnNotPt_V4 : LDInst2<(outs DoubleRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1.new) $dst=memd($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
 // Rdd=memd(Rt<<#u2+#U6)
 
 //// Load byte.
@@ -527,26 +591,6 @@ def LDriub_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
                     Requires<[HasV4T]>;
 
 let AddedComplexity = 40, isPredicable = 1 in
-def LDrib_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
-                    "$dst=memb($src1+$src2<<#$offset)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (sextloadi8 (add (i32 IntRegs:$src1),
-                                                (shl (i32 IntRegs:$src2),
-                                                     u2ImmPred:$offset)))))]>,
-                    Requires<[HasV4T]>;
-
-let AddedComplexity = 40, isPredicable = 1 in
-def LDriub_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
-                    "$dst=memub($src1+$src2<<#$offset)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (zextloadi8 (add (i32 IntRegs:$src1),
-                                                (shl (i32 IntRegs:$src2),
-                                                     u2ImmPred:$offset)))))]>,
-                    Requires<[HasV4T]>;
-
-let AddedComplexity = 40, isPredicable = 1 in
 def LDriub_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
                     (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
                     "$dst=memub($src1+$src2<<#$offset)",
@@ -590,42 +634,6 @@ def LDrib_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
                     []>,
                     Requires<[HasV4T]>;
 
-// if (Pv) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrib_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1) $dst=memb($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrib_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1.new) $dst=memb($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrib_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1) $dst=memb($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memb(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrib_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1.new) $dst=memb($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
 //// Load unsigned byte conditionally.
 // if ([!]Pv[.new]) Rd=memub(Rs+Rt<<#u2)
 // if (Pv) Rd=memub(Rs+Rt<<#u2)
@@ -660,42 +668,6 @@ def LDriub_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
                     []>,
                     Requires<[HasV4T]>;
 
-// if (Pv) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriub_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1) $dst=memub($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriub_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1.new) $dst=memub($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriub_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1) $dst=memub($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memub(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriub_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1.new) $dst=memub($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
 // Rd=memb(Rt<<#u2+#U6)
 
 //// Load halfword
@@ -727,27 +699,6 @@ def LDriuh_ae_indexed_V4 : LDInst<(outs IntRegs:$dst),
                                                 (i32 IntRegs:$src2)))))]>,
                     Requires<[HasV4T]>;
 
-// Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 40, isPredicable = 1 in
-def LDrih_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
-                    "$dst=memh($src1+$src2<<#$offset)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (sextloadi16 (add (i32 IntRegs:$src1),
-                                                 (shl (i32 IntRegs:$src2),
-                                                      u2ImmPred:$offset)))))]>,
-                    Requires<[HasV4T]>;
-
-let AddedComplexity = 40, isPredicable = 1 in
-def LDriuh_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
-                    "$dst=memuh($src1+$src2<<#$offset)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (zextloadi16 (add (i32 IntRegs:$src1),
-                                                 (shl (i32 IntRegs:$src2),
-                                                      u2ImmPred:$offset)))))]>,
-                    Requires<[HasV4T]>;
-
 let AddedComplexity = 40, isPredicable = 1 in
 def LDriuh_ae_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
                     (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
@@ -792,42 +743,6 @@ def LDrih_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
                     []>,
                     Requires<[HasV4T]>;
 
-// if (Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrih_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1) $dst=memh($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrih_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1.new) $dst=memh($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrih_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1) $dst=memh($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDrih_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1.new) $dst=memh($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
 //// Load unsigned halfword conditionally.
 // if ([!]Pv[.new]) Rd=memuh(Rs+Rt<<#u2)
 // if (Pv) Rd=memuh(Rs+Rt<<#u2)
@@ -862,42 +777,6 @@ def LDriuh_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
                     []>,
                     Requires<[HasV4T]>;
 
-// if (Pv) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriuh_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1) $dst=memuh($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriuh_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1.new) $dst=memuh($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriuh_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1) $dst=memuh($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memuh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriuh_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1.new) $dst=memuh($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
 // Rd=memh(Rt<<#u2+#U6)
 
 //// Load word.
@@ -921,17 +800,6 @@ def LDriw_indexed_V4 : LDInst<(outs IntRegs:$dst),
                                           (i32 IntRegs:$src2)))))]>,
                     Requires<[HasV4T]>;
 
-// Rd=memw(Rs+Rt<<#u2)
-let AddedComplexity = 40, isPredicable = 1 in
-def LDriw_indexed_shl_V4 : LDInst<(outs IntRegs:$dst),
-                    (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$offset),
-                    "$dst=memw($src1+$src2<<#$offset)",
-                    [(set (i32 IntRegs:$dst),
-                          (i32 (load (add (i32 IntRegs:$src1),
-                                          (shl (i32 IntRegs:$src2),
-                                               u2ImmPred:$offset)))))]>,
-                    Requires<[HasV4T]>;
-
 //// Load word conditionally.
 // if ([!]Pv[.new]) Rd=memw(Rs+Rt<<#u2)
 // if (Pv) Rd=memw(Rs+Rt<<#u2)
@@ -966,42 +834,6 @@ def LDriw_indexed_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
                     []>,
                     Requires<[HasV4T]>;
 
-// if (Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriw_indexed_shl_cPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1) $dst=memw($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriw_indexed_shl_cdnPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if ($src1.new) $dst=memw($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriw_indexed_shl_cNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1) $dst=memw($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
-// if (!Pv.new) Rd=memh(Rs+Rt<<#u2)
-let AddedComplexity = 45, isPredicated = 1 in
-def LDriw_indexed_shl_cdnNotPt_V4 : LDInst2<(outs IntRegs:$dst),
-                    (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3,
-                         u2Imm:$offset),
-                    "if (!$src1.new) $dst=memw($src2+$src3<<#$offset)",
-                    []>,
-                    Requires<[HasV4T]>;
-
 /// Load from global offset
 
 let isPredicable = 1, neverHasSideEffects = 1 in
@@ -1701,16 +1533,124 @@ def STriw_abs_set_V4 : STInst2<(outs IntRegs:$dst1),
             []>,
             Requires<[HasV4T]>;
 
-// memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, isPredicable = 1 in
-def STrid_indexed_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, DoubleRegs:$src4),
-            "memd($src1+$src2<<#$src3) = $src4",
-            [(store (i64 DoubleRegs:$src4),
-                    (add (i32 IntRegs:$src1),
-                         (shl (i32 IntRegs:$src2), u2ImmPred:$src3)))]>,
+
+// multiclass for store instructions with base + register offset addressing
+// mode
+multiclass ST_Idxd_shl_Pbase<string mnemonic, RegisterClass RC, bit isNot,
+                             bit isPredNew> {
+  let PNewValue = #!if(isPredNew, "new", "") in
+  def #NAME# : STInst2<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 RC:$src5),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#mnemonic#"($src2+$src3<<#$src4) = $src5",
+            []>,
+            Requires<[HasV4T]>;
+}
+
+multiclass ST_Idxd_shl_Pred<string mnemonic, RegisterClass RC, bit PredNot> {
+  let PredSense = #!if(PredNot, "false", "true") in {
+    defm _c#NAME# : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME# : ST_Idxd_shl_Pbase<mnemonic, RC, PredNot, 1>;
+  }
+}
+
+let isNVStorable = 1 in
+multiclass ST_Idxd_shl<string mnemonic, string CextOp, RegisterClass RC> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
+    let isPredicable = 1 in
+    def #NAME#_V4 : STInst2<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4),
+            #mnemonic#"($src1+$src2<<#$src3) = $src4",
+            []>,
             Requires<[HasV4T]>;
 
+    let isPredicated = 1 in {
+      defm Pt_V4 : ST_Idxd_shl_Pred<mnemonic, RC, 0 >;
+      defm NotPt_V4 : ST_Idxd_shl_Pred<mnemonic, RC, 1>;
+    }
+  }
+}
+
+// multiclass for new-value store instructions with base + register offset
+// addressing mode.
+multiclass ST_Idxd_shl_Pbase_nv<string mnemonic, RegisterClass RC, bit isNot,
+                             bit isPredNew> {
+  let PNewValue = #!if(isPredNew, "new", "") in
+  def #NAME#_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
+                 RC:$src5),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#mnemonic#"($src2+$src3<<#$src4) = $src5.new",
+            []>,
+            Requires<[HasV4T]>;
+}
+
+multiclass ST_Idxd_shl_Pred_nv<string mnemonic, RegisterClass RC, bit PredNot> {
+  let PredSense = #!if(PredNot, "false", "true") in {
+    defm _c#NAME# : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME# : ST_Idxd_shl_Pbase_nv<mnemonic, RC, PredNot, 1>;
+  }
+}
+
+let mayStore = 1, isNVStore = 1 in
+multiclass ST_Idxd_shl_nv<string mnemonic, string CextOp, RegisterClass RC> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed_shl in {
+    let isPredicable = 1 in
+    def #NAME#_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, RC:$src4),
+            #mnemonic#"($src1+$src2<<#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+
+    let isPredicated = 1 in {
+      defm Pt : ST_Idxd_shl_Pred_nv<mnemonic, RC, 0 >;
+      defm NotPt : ST_Idxd_shl_Pred_nv<mnemonic, RC, 1>;
+    }
+  }
+}
+
+let addrMode = BaseRegOffset, neverHasSideEffects = 1,
+validSubTargets = HasV4SubT in {
+  defm STrib_indexed_shl: ST_Idxd_shl<"memb", "STrib", IntRegs>,
+                          ST_Idxd_shl_nv<"memb", "STrib", IntRegs>, AddrModeRel;
+
+  defm STrih_indexed_shl: ST_Idxd_shl<"memh", "STrih", IntRegs>,
+                          ST_Idxd_shl_nv<"memh", "STrih", IntRegs>, AddrModeRel;
+
+  defm STriw_indexed_shl: ST_Idxd_shl<"memw", "STriw", IntRegs>,
+                          ST_Idxd_shl_nv<"memw", "STriw", IntRegs>, AddrModeRel;
+
+  let isNVStorable = 0 in
+  defm STrid_indexed_shl: ST_Idxd_shl<"memd", "STrid", DoubleRegs>, AddrModeRel;
+}
+
+let Predicates = [HasV4T], AddedComplexity = 10 in {
+def : Pat<(truncstorei8 (i32 IntRegs:$src4),
+                       (add IntRegs:$src1, (shl IntRegs:$src2,
+                                                u2ImmPred:$src3))),
+          (STrib_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+                                u2ImmPred:$src3, IntRegs:$src4)>;
+
+def : Pat<(truncstorei16 (i32 IntRegs:$src4),
+                        (add IntRegs:$src1, (shl IntRegs:$src2,
+                                                 u2ImmPred:$src3))),
+          (STrih_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+                                u2ImmPred:$src3, IntRegs:$src4)>;
+
+def : Pat<(store (i32 IntRegs:$src4),
+                 (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))),
+          (STriw_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+                                u2ImmPred:$src3, IntRegs:$src4)>;
+
+def : Pat<(store (i64 DoubleRegs:$src4),
+                (add IntRegs:$src1, (shl IntRegs:$src2, u2ImmPred:$src3))),
+          (STrid_indexed_shl_V4 IntRegs:$src1, IntRegs:$src2,
+                                u2ImmPred:$src3, DoubleRegs:$src4)>;
+}
+
 // memd(Ru<<#u2+#U6)=Rtt
 let AddedComplexity = 10 in
 def STrid_shl_V4 : STInst<(outs),
@@ -1732,88 +1672,6 @@ def STrid_shl_V4 : STInst<(outs),
 // if ([!]Pv[.new]) memd(#u6)=Rtt
 // TODO: needs to be implemented.
 
-// if ([!]Pv[.new]) memd(Rs+#u6:3)=Rtt
-// if (Pv) memd(Rs+#u6:3)=Rtt
-// if (Pv.new) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
-            "if ($src1.new) memd($addr) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memd(Rs+#u6:3)=Rtt
-// if (!Pv.new) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, DoubleRegs:$src2),
-            "if (!$src1.new) memd($addr) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memd(Rs+#u6:3)=Rtt
-// if (Pv.new) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_indexed_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
-                 DoubleRegs:$src4),
-            "if ($src1.new) memd($src2+#$src3) = $src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memd(Rs+#u6:3)=Rtt
-// if (!Pv.new) memd(Rs+#u6:3)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_indexed_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_3Imm:$src3,
-                 DoubleRegs:$src4),
-            "if (!$src1.new) memd($src2+#$src3) = $src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memd(Rs+Ru<<#u2)=Rtt
-// if (Pv) memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_indexed_shl_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 DoubleRegs:$src5),
-            "if ($src1) memd($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_indexed_shl_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 DoubleRegs:$src5),
-            "if ($src1.new) memd($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-// if (!Pv) memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_indexed_shl_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 DoubleRegs:$src5),
-            "if (!$src1) memd($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-// if (!Pv.new) memd(Rs+Ru<<#u2)=Rtt
-let AddedComplexity = 10, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrid_indexed_shl_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 DoubleRegs:$src5),
-            "if (!$src1.new) memd($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
 // if ([!]Pv[.new]) memd(Rx++#s4:3)=Rtt
 // if (Pv) memd(Rx++#s4:3)=Rtt
 // if (Pv.new) memd(Rx++#s4:3)=Rtt
@@ -1840,27 +1698,68 @@ def POST_STdri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
             Requires<[HasV4T]>;
 
 
-// Store byte.
-// memb(Rs+#u6:0)=#S8
-let AddedComplexity = 10, isPredicable = 1 in
-def STrib_imm_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u6_0Imm:$src2, s8Imm:$src3),
-            "memb($src1+#$src2) = #$src3",
-            [(truncstorei8 s8ImmPred:$src3, (add (i32 IntRegs:$src1),
-                                                 u6_0ImmPred:$src2))]>,
+// multiclass for store instructions with base + immediate offset
+// addressing mode and immediate stored value.
+multiclass ST_Imm_Pbase<string mnemonic, Operand OffsetOp, bit isNot,
+                        bit isPredNew> {
+  let PNewValue = #!if(isPredNew, "new", "") in
+  def #NAME# : STInst2<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, OffsetOp:$src3, s6Ext:$src4),
+            #!if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#mnemonic#"($src2+#$src3) = #$src4",
+            []>,
             Requires<[HasV4T]>;
+}
 
-// memb(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10, isPredicable = 1 in
-def STrib_indexed_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
-            "memb($src1+$src2<<#$src3) = $src4",
-            [(truncstorei8 (i32 IntRegs:$src4),
-                           (add (i32 IntRegs:$src1),
-                                (shl (i32 IntRegs:$src2),
-                                          u2ImmPred:$src3)))]>,
+multiclass ST_Imm_Pred<string mnemonic, Operand OffsetOp, bit PredNot> {
+  let PredSense = #!if(PredNot, "false", "true") in {
+    defm _c#NAME# : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME# : ST_Imm_Pbase<mnemonic, OffsetOp, PredNot, 1>;
+  }
+}
+
+let isExtendable = 1, isExtentSigned = 1, neverHasSideEffects = 1 in
+multiclass ST_Imm<string mnemonic, string CextOp, Operand OffsetOp> {
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_imm in {
+    let opExtendable = 2, opExtentBits = 8, isPredicable = 1 in
+    def #NAME#_V4 : STInst2<(outs),
+            (ins IntRegs:$src1, OffsetOp:$src2, s8Ext:$src3),
+            #mnemonic#"($src1+#$src2) = #$src3",
+            []>,
             Requires<[HasV4T]>;
 
+    let opExtendable = 3, opExtentBits = 6, isPredicated = 1 in {
+      defm Pt_V4 : ST_Imm_Pred<mnemonic, OffsetOp, 0>;
+      defm NotPt_V4 : ST_Imm_Pred<mnemonic, OffsetOp, 1 >;
+    }
+  }
+}
+
+let addrMode = BaseImmOffset, InputType = "imm",
+    validSubTargets = HasV4SubT in {
+  defm STrib_imm : ST_Imm<"memb", "STrib", u6_0Imm>, ImmRegRel;
+  defm STrih_imm : ST_Imm<"memh", "STrih", u6_1Imm>, ImmRegRel;
+  defm STriw_imm : ST_Imm<"memw", "STriw", u6_2Imm>, ImmRegRel;
+}
+
+let Predicates = [HasV4T], AddedComplexity = 10 in {
+def: Pat<(truncstorei8 s8ExtPred:$src3, (add IntRegs:$src1, u6_0ImmPred:$src2)),
+            (STrib_imm_V4 IntRegs:$src1, u6_0ImmPred:$src2, s8ExtPred:$src3)>;
+
+def: Pat<(truncstorei16 s8ExtPred:$src3, (add IntRegs:$src1,
+                                              u6_1ImmPred:$src2)),
+            (STrih_imm_V4 IntRegs:$src1, u6_1ImmPred:$src2, s8ExtPred:$src3)>;
+
+def: Pat<(store s8ExtPred:$src3, (add IntRegs:$src1, u6_2ImmPred:$src2)),
+            (STriw_imm_V4 IntRegs:$src1, u6_2ImmPred:$src2, s8ExtPred:$src3)>;
+}
+
+let AddedComplexity = 6 in
+def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)),
+           (STrib_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
+           Requires<[HasV4T]>;
+
 // memb(Ru<<#u2+#U6)=Rt
 let AddedComplexity = 10 in
 def STrib_shl_V4 : STInst<(outs),
@@ -1880,125 +1779,6 @@ def STrib_shl_V4 : STInst<(outs),
 
 // Store byte conditionally.
 // if ([!]Pv[.new]) memb(#u6)=Rt
-// if ([!]Pv[.new]) memb(Rs+#u6:0)=#S6
-// if (Pv) memb(Rs+#u6:0)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_imm_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
-            "if ($src1) memb($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memb(Rs+#u6:0)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_imm_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
-            "if ($src1.new) memb($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rs+#u6:0)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_imm_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
-            "if (!$src1) memb($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memb(Rs+#u6:0)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_imm_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, s6Imm:$src4),
-            "if (!$src1.new) memb($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memb(Rs+#u6:0)=Rt
-// if (Pv) memb(Rs+#u6:0)=Rt
-// if (Pv.new) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1.new) memb($addr) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rs+#u6:0)=Rt
-// if (!Pv.new) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1.new) memb($addr) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memb(Rs+#u6:0)=Rt
-// if (!Pv) memb(Rs+#u6:0)=Rt
-// if (Pv.new) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_indexed_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
-            "if ($src1.new) memb($src2+#$src3) = $src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memb(Rs+#u6:0)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_indexed_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
-            "if (!$src1.new) memb($src2+#$src3) = $src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Rt
-// if (Pv) memb(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STrib_indexed_shl_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1) memb($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memb(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STrib_indexed_shl_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1.new) memb($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STrib_indexed_shl_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1) memb($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memb(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STrib_indexed_shl_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
 // if ([!]Pv[.new]) memb(Rx++#s4:0)=Rt
 // if (Pv) memb(Rx++#s4:0)=Rt
 // if (Pv.new) memb(Rx++#s4:0)=Rt
@@ -2025,29 +1805,14 @@ def POST_STbri_cdnNotPt_V4 : STInst2PI<(outs IntRegs:$dst),
 // TODO: needs to be implemented
 // memh(Re=#U6)=Rt.H
 // memh(Rs+#s11:1)=Rt.H
-// memh(Rs+#u6:1)=#S8
-let AddedComplexity = 10, isPredicable = 1 in
-def STrih_imm_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u6_1Imm:$src2, s8Imm:$src3),
-            "memh($src1+#$src2) = #$src3",
-            [(truncstorei16 s8ImmPred:$src3, (add (i32 IntRegs:$src1),
-                                                  u6_1ImmPred:$src2))]>,
-            Requires<[HasV4T]>;
+let AddedComplexity = 6 in
+def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)),
+           (STrih_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
+           Requires<[HasV4T]>;
 
 // memh(Rs+Ru<<#u2)=Rt.H
 // TODO: needs to be implemented.
 
-// memh(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10, isPredicable = 1 in
-def STrih_indexed_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
-            "memh($src1+$src2<<#$src3) = $src4",
-            [(truncstorei16 (i32 IntRegs:$src4),
-                            (add (i32 IntRegs:$src1),
-                                 (shl (i32 IntRegs:$src2),
-                                      u2ImmPred:$src3)))]>,
-            Requires<[HasV4T]>;
-
 // memh(Ru<<#u2+#U6)=Rt.H
 // memh(Ru<<#u2+#U6)=Rt
 let AddedComplexity = 10 in
@@ -2071,127 +1836,10 @@ def STrih_shl_V4 : STInst<(outs),
 // if ([!]Pv[.new]) memh(#u6)=Rt.H
 // if ([!]Pv[.new]) memh(#u6)=Rt
 
-// if ([!]Pv[.new]) memh(Rs+#u6:1)=#S6
-// if (Pv) memh(Rs+#u6:1)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_imm_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
-            "if ($src1) memh($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+#u6:1)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_imm_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
-            "if ($src1.new) memh($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+#u6:1)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_imm_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
-            "if (!$src1) memh($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+#u6:1)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_imm_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, s6Imm:$src4),
-            "if (!$src1.new) memh($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
 
 // if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H
 // TODO: needs to be implemented.
 
-// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt
-// if (Pv) memh(Rs+#u6:1)=Rt
-// if (Pv.new) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1.new) memh($addr) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+#u6:1)=Rt
-// if (!Pv.new) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1.new) memh($addr) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_indexed_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
-            "if ($src1.new) memh($src2+#$src3) = $src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+#u6:1)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_indexed_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
-            "if (!$src1.new) memh($src2+#$src3) = $src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt.H
-// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Rt
-// if (Pv) memh(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STrih_indexed_shl_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1) memh($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STrih_indexed_shl_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1.new) memh($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STrih_indexed_shl_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1) memh($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STrih_indexed_shl_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
 // if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H
 // TODO: Needs to be implemented.
 
@@ -2229,25 +1877,10 @@ def STriw_pred_V4 : STInst2<(outs),
             []>,
             Requires<[HasV4T]>;
 
-
-// memw(Rs+#u6:2)=#S8
-let AddedComplexity = 10, isPredicable = 1 in
-def STriw_imm_V4 : STInst<(outs),
-            (ins IntRegs:$src1, u6_2Imm:$src2, s8Imm:$src3),
-            "memw($src1+#$src2) = #$src3",
-            [(store s8ImmPred:$src3, (add (i32 IntRegs:$src1),
-                                          u6_2ImmPred:$src2))]>,
-            Requires<[HasV4T]>;
-
-// memw(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10, isPredicable = 1 in
-def STriw_indexed_shl_V4 : STInst<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
-            "memw($src1+$src2<<#$src3) = $src4",
-            [(store (i32 IntRegs:$src4), (add (i32 IntRegs:$src1),
-                                    (shl (i32 IntRegs:$src2),
-                                         u2ImmPred:$src3)))]>,
-            Requires<[HasV4T]>;
+let AddedComplexity = 6 in
+def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)),
+           (STriw_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>,
+           Requires<[HasV4T]>;
 
 // memw(Ru<<#u2+#U6)=Rt
 let AddedComplexity = 10 in
@@ -2267,127 +1900,6 @@ def STriw_shl_V4 : STInst<(outs),
 // memw(gp+#u16:2)=Rt
 
 
-// Store word conditionally.
-
-// if ([!]Pv[.new]) memw(Rs+#u6:2)=#S6
-// if (Pv) memw(Rs+#u6:2)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_imm_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
-            "if ($src1) memw($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rs+#u6:2)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_imm_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
-            "if ($src1.new) memw($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+#u6:2)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_imm_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
-            "if (!$src1) memw($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+#u6:2)=#S6
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_imm_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, s6Imm:$src4),
-            "if (!$src1.new) memw($src2+#$src3) = #$src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memw(Rs+#u6:2)=Rt
-// if (Pv) memw(Rs+#u6:2)=Rt
-// if (Pv.new) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if ($src1.new) memw($addr) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+#u6:2)=Rt
-// if (!Pv.new) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, MEMri:$addr, IntRegs:$src2),
-            "if (!$src1.new) memw($addr) = $src2",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv) memw(Rs+#u6:2)=Rt
-// if (!Pv) memw(Rs+#u6:2)=Rt
-// if (Pv.new) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_indexed_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
-            "if ($src1.new) memw($src2+#$src3) = $src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+#u6:2)=Rt
-let neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_indexed_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
-            "if (!$src1.new) memw($src2+#$src3) = $src4",
-            []>,
-            Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Rt
-// if (Pv) memw(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STriw_indexed_shl_cPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1) memw($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STriw_indexed_shl_cdnPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1.new) memw($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STriw_indexed_shl_cNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1) memw($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+Ru<<#u2)=Rt
-let AddedComplexity = 10,
-    isPredicated = 1 in
-def STriw_indexed_shl_cdnNotPt_V4 : STInst2<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5",
-            []>,
-            Requires<[HasV4T]>;
-
 // if ([!]Pv[.new]) memw(Rx++#s4:2)=Rt
 // if (Pv) memw(Rx++#s4:2)=Rt
 // if (Pv.new) memw(Rx++#s4:2)=Rt
@@ -2860,6 +2372,59 @@ def : Pat<(store (i32 IntRegs:$src1),
 // NV/ST +
 //===----------------------------------------------------------------------===//
 
+// multiclass for new-value store instructions with base + immediate offset.
+//
+multiclass ST_Idxd_Pbase_nv<string mnemonic, RegisterClass RC,
+                            Operand predImmOp, bit isNot, bit isPredNew> {
+  let PNewValue = #!if(isPredNew, "new", "") in
+  def #NAME#_nv_V4 : NVInst_V4<(outs),
+            (ins PredRegs:$src1, IntRegs:$src2, predImmOp:$src3, RC: $src4),
+            !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
+            ") ")#mnemonic#"($src2+#$src3) = $src4.new",
+            []>,
+            Requires<[HasV4T]>;
+}
+
+multiclass ST_Idxd_Pred_nv<string mnemonic, RegisterClass RC, Operand predImmOp,
+                           bit PredNot> {
+  let PredSense = #!if(PredNot, "false", "true") in {
+    defm _c#NAME# : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 0>;
+    // Predicate new
+    defm _cdn#NAME# : ST_Idxd_Pbase_nv<mnemonic, RC, predImmOp, PredNot, 1>;
+  }
+}
+
+let mayStore = 1, isNVStore = 1, neverHasSideEffects = 1, isExtendable = 1 in
+multiclass ST_Idxd_nv<string mnemonic, string CextOp, RegisterClass RC,
+                   Operand ImmOp, Operand predImmOp, bits<5> ImmBits,
+                   bits<5> PredImmBits> {
+
+  let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
+    let opExtendable = 1, isExtentSigned = 1, opExtentBits = ImmBits,
+    isPredicable = 1 in
+    def #NAME#_nv_V4 : NVInst_V4<(outs),
+            (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+            #mnemonic#"($src1+#$src2) = $src3.new",
+            []>,
+            Requires<[HasV4T]>;
+
+    let opExtendable = 2, isExtentSigned = 0, opExtentBits = PredImmBits,
+    isPredicated = 1 in {
+      defm Pt : ST_Idxd_Pred_nv<mnemonic, RC, predImmOp, 0>;
+      defm NotPt : ST_Idxd_Pred_nv<mnemonic, RC, predImmOp, 1>;
+    }
+  }
+}
+
+let addrMode = BaseImmOffset, validSubTargets = HasV4SubT in {
+  defm STrib_indexed: ST_Idxd_nv<"memb", "STrib", IntRegs, s11_0Ext,
+                                 u6_0Ext, 11, 6>, AddrModeRel;
+  defm STrih_indexed: ST_Idxd_nv<"memh", "STrih", IntRegs, s11_1Ext,
+                                 u6_1Ext, 12, 7>, AddrModeRel;
+  defm STriw_indexed: ST_Idxd_nv<"memw", "STriw", IntRegs, s11_2Ext,
+                                 u6_2Ext, 13, 8>, AddrModeRel;
+}
+
 // Store new-value byte.
 
 // memb(Re=#U6)=Nt.new
@@ -2870,21 +2435,6 @@ def STrib_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
             []>,
             Requires<[HasV4T]>;
 
-let mayStore = 1, isPredicable = 1 in
-def STrib_indexed_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, s11_0Imm:$src2, IntRegs:$src3),
-            "memb($src1+#$src2) = $src3.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
-def STrib_indexed_shl_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
-            "memb($src1+$src2<<#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
 // memb(Ru<<#u2+#U6)=Nt.new
 let mayStore = 1, AddedComplexity = 10 in
 def STrib_shl_nv_V4 : NVInst_V4<(outs),
@@ -2961,84 +2511,6 @@ def STrib_cdnNotPt_nv_V4 : NVInst_V4<(outs),
             []>,
             Requires<[HasV4T]>;
 
-// if (Pv) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_indexed_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
-            "if ($src1) memb($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
-            "if ($src1.new) memb($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
-            "if (!$src1) memb($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memb(Rs+#u6:0)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrib_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_0Imm:$src3, IntRegs:$src4),
-            "if (!$src1.new) memb($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-
-// if ([!]Pv[.new]) memb(Rs+Ru<<#u2)=Nt.new
-// if (Pv) memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STrib_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1) memb($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STrib_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1.new) memb($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STrib_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1) memb($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memb(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STrib_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1.new) memb($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
 // if ([!]Pv[.new]) memb(Rx++#s4:0)=Nt.new
 // if (Pv) memb(Rx++#s4:0)=Nt.new
 let mayStore = 1, hasCtrlDep = 1,
@@ -3086,21 +2558,6 @@ def STrih_nv_V4 : NVInst_V4<(outs), (ins MEMri:$addr, IntRegs:$src1),
             []>,
             Requires<[HasV4T]>;
 
-let mayStore = 1, isPredicable = 1 in
-def STrih_indexed_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, s11_1Imm:$src2, IntRegs:$src3),
-            "memh($src1+#$src2) = $src3.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
-def STrih_indexed_shl_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
-            "memh($src1+$src2<<#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
 // memh(Ru<<#u2+#U6)=Nt.new
 let mayStore = 1, AddedComplexity = 10 in
 def STrih_shl_nv_V4 : NVInst_V4<(outs),
@@ -3181,83 +2638,6 @@ def STrih_cdnNotPt_nv_V4 : NVInst_V4<(outs),
             []>,
             Requires<[HasV4T]>;
 
-// if (Pv) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_indexed_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
-            "if ($src1) memh($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
-            "if ($src1.new) memh($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
-            "if (!$src1) memh($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+#u6:1)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STrih_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_1Imm:$src3, IntRegs:$src4),
-            "if (!$src1.new) memh($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if ([!]Pv[.new]) memh(Rs+Ru<<#u2)=Nt.new
-// if (Pv) memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STrih_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1) memh($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STrih_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1.new) memh($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STrih_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1) memh($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memh(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STrih_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1.new) memh($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
 // if ([!]Pv[]) memh(Rx++#s4:1)=Nt.new
 // if (Pv) memh(Rx++#s4:1)=Nt.new
 let mayStore = 1, hasCtrlDep = 1,
@@ -3307,21 +2687,6 @@ def STriw_nv_V4 : NVInst_V4<(outs),
             []>,
             Requires<[HasV4T]>;
 
-let mayStore = 1, isPredicable = 1 in
-def STriw_indexed_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, s11_2Imm:$src2, IntRegs:$src3),
-            "memw($src1+#$src2) = $src3.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10, isPredicable = 1 in
-def STriw_indexed_shl_nv_V4 : NVInst_V4<(outs),
-            (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$src3, IntRegs:$src4),
-            "memw($src1+$src2<<#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
 // memw(Ru<<#u2+#U6)=Nt.new
 let mayStore = 1, AddedComplexity = 10 in
 def STriw_shl_nv_V4 : NVInst_V4<(outs),
@@ -3399,84 +2764,6 @@ def STriw_cdnNotPt_nv_V4 : NVInst_V4<(outs),
             []>,
             Requires<[HasV4T]>;
 
-// if (Pv) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_indexed_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
-            "if ($src1) memw($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_indexed_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
-            "if ($src1.new) memw($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_indexed_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
-            "if (!$src1) memw($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+#u6:2)=Nt.new
-let mayStore = 1, neverHasSideEffects = 1,
-    isPredicated = 1 in
-def STriw_indexed_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, u6_2Imm:$src3, IntRegs:$src4),
-            "if (!$src1.new) memw($src2+#$src3) = $src4.new",
-            []>,
-            Requires<[HasV4T]>;
-
-
-// if ([!]Pv[.new]) memw(Rs+Ru<<#u2)=Nt.new
-// if (Pv) memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STriw_indexed_shl_cPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1) memw($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (Pv.new) memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STriw_indexed_shl_cdnPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if ($src1.new) memw($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv) memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STriw_indexed_shl_cNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1) memw($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
-// if (!Pv.new) memw(Rs+Ru<<#u2)=Nt.new
-let mayStore = 1, AddedComplexity = 10,
-    isPredicated = 1 in
-def STriw_indexed_shl_cdnNotPt_nv_V4 : NVInst_V4<(outs),
-            (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$src4,
-                 IntRegs:$src5),
-            "if (!$src1.new) memw($src2+$src3<<#$src4) = $src5.new",
-            []>,
-            Requires<[HasV4T]>;
-
 // if ([!]Pv[.new]) memw(Rx++#s4:2)=Nt.new
 // if (Pv) memw(Rx++#s4:2)=Nt.new
 let mayStore = 1, hasCtrlDep = 1,
diff --git a/lib/Target/Hexagon/HexagonMCInst.h b/lib/Target/Hexagon/HexagonMCInst.h
index 7a16c241ff..e16636ea48 100644
--- a/lib/Target/Hexagon/HexagonMCInst.h
+++ b/lib/Target/Hexagon/HexagonMCInst.h
@@ -14,8 +14,8 @@
 #ifndef HEXAGONMCINST_H
 #define HEXAGONMCINST_H
 
-#include "llvm/MC/MCInst.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCInst.h"
 
 namespace llvm {
   class HexagonMCInst: public MCInst {
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
index 70bddcc76a..dc644ad213 100644
--- a/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -15,8 +15,8 @@
 #include "Hexagon.h"
 #include "HexagonAsmPrinter.h"
 #include "HexagonMachineFunctionInfo.h"
-#include "llvm/Constants.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Constants.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Target/Mangler.h"
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 0e9ef4838d..aef6830752 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -15,7 +15,6 @@
 #define DEBUG_TYPE "misched"
 
 #include "HexagonMachineScheduler.h"
-
 #include <queue>
 
 using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index fe0242a0f7..f68dadf292 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -14,6 +14,9 @@
 #ifndef HEXAGONASMPRINTER_H
 #define HEXAGONASMPRINTER_H
 
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/Passes.h"
@@ -22,14 +25,11 @@
 #include "llvm/CodeGen/ResourcePriorityQueue.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 1e91c39485..cd3d2898d0 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -22,31 +22,29 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "hexagon-nvj"
-#include "llvm/PassSupport.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "Hexagon.h"
-#include "HexagonTargetMachine.h"
-#include "HexagonRegisterInfo.h"
-#include "HexagonSubtarget.h"
-#include "HexagonInstrInfo.h"
-#include "HexagonMachineFunctionInfo.h"
-
 #include <map>
-
-#include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
 STATISTIC(NumNVJGenerated, "Number of New Value Jump Instructions created");
diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td
index 3f43d697fc..c79d78f210 100644
--- a/lib/Target/Hexagon/HexagonOperands.td
+++ b/lib/Target/Hexagon/HexagonOperands.td
@@ -465,3 +465,394 @@ def SetClr3ImmPred : PatLeaf<(i32 imm), [{
   int8_t v = (int8_t)N->getSExtValue();
   return (v >= 0 && v <= 7);
 }]>;
+
+
+// Extendable immediate operands.
+
+let PrintMethod = "printExtOperand" in {
+  def s16Ext : Operand<i32>;
+  def s12Ext : Operand<i32>;
+  def s10Ext : Operand<i32>;
+  def s9Ext : Operand<i32>;
+  def s8Ext : Operand<i32>;
+  def s6Ext : Operand<i32>;
+  def s11_0Ext : Operand<i32>;
+  def s11_1Ext : Operand<i32>;
+  def s11_2Ext : Operand<i32>;
+  def s11_3Ext : Operand<i32>;
+  def u6Ext : Operand<i32>;
+  def u7Ext : Operand<i32>;
+  def u8Ext : Operand<i32>;
+  def u9Ext : Operand<i32>;
+  def u10Ext : Operand<i32>;
+  def u6_0Ext : Operand<i32>;
+  def u6_1Ext : Operand<i32>;
+  def u6_2Ext : Operand<i32>;
+  def u6_3Ext : Operand<i32>;
+}
+
+let PrintMethod = "printImmOperand" in
+def u0AlwaysExt : Operand<i32>;
+
+// Predicates for constant extendable operands
+def s16ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 16-bit sign extended field.
+    return isInt<16>(v);
+  else {
+    if (isInt<16>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit signed field.
+    return isConstExtProfitable(Node) && isInt<32>(v);
+  }
+}]>;
+
+def s10ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 10-bit sign extended field.
+    return isInt<10>(v);
+  else {
+    if (isInt<10>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit signed field.
+    return isConstExtProfitable(Node) && isInt<32>(v);
+  }
+}]>;
+
+def s9ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 9-bit sign extended field.
+    return isInt<9>(v);
+  else {
+    if (isInt<9>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isInt<32>(v);
+  }
+}]>;
+
+def s8ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 8-bit sign extended field.
+    return isInt<8>(v);
+  else {
+    if (isInt<8>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit signed field.
+    return isConstExtProfitable(Node) && isInt<32>(v);
+  }
+}]>;
+
+def s8_16ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate fits in a 8-bit sign extended field.
+    return isInt<8>(v);
+  else {
+    if (isInt<8>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can't fit in a 16-bit signed field. This is required to avoid
+    // unnecessary constant extenders.
+    return isConstExtProfitable(Node) && !isInt<16>(v);
+  }
+}]>;
+
+def s6ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 6-bit sign extended field.
+    return isInt<6>(v);
+  else {
+    if (isInt<6>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isInt<32>(v);
+  }
+}]>;
+
+def s6_16ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate fits in a 6-bit sign extended field.
+    return isInt<6>(v);
+  else {
+    if (isInt<6>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can't fit in a 16-bit signed field. This is required to avoid
+    // unnecessary constant extenders.
+    return isConstExtProfitable(Node) && !isInt<16>(v);
+  }
+}]>;
+
+def s6_10ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 6-bit sign extended field.
+    return isInt<6>(v);
+  else {
+    if (isInt<6>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can't fit in a 10-bit signed field. This is required to avoid
+    // unnecessary constant extenders.
+    return isConstExtProfitable(Node) && !isInt<10>(v);
+  }
+}]>;
+
+def s11_0ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 11-bit sign extended field.
+    return isShiftedInt<11,0>(v);
+  else {
+    if (isInt<11>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit signed field.
+    return isConstExtProfitable(Node) && isInt<32>(v);
+  }
+}]>;
+
+def s11_1ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 12-bit sign extended field and
+    // is 2 byte aligned.
+    return isShiftedInt<11,1>(v);
+  else {
+    if (isInt<12>(v))
+      return isShiftedInt<11,1>(v);
+
+    // Return true if extending this immediate is profitable and the low 1 bit
+    // is zero (2-byte aligned).
+    return isConstExtProfitable(Node) && isInt<32>(v) && ((v % 2) == 0);
+  }
+}]>;
+
+def s11_2ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 13-bit sign extended field and
+    // is 4-byte aligned.
+    return isShiftedInt<11,2>(v);
+  else {
+    if (isInt<13>(v))
+      return isShiftedInt<11,2>(v);
+
+    // Return true if extending this immediate is profitable and the low 2-bits
+    // are zero (4-byte aligned).
+    return isConstExtProfitable(Node)  && isInt<32>(v) && ((v % 4) == 0);
+  }
+}]>;
+
+def s11_3ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 14-bit sign extended field and
+    // is 8-byte aligned.
+    return isShiftedInt<11,3>(v);
+  else {
+    if (isInt<14>(v))
+     return isShiftedInt<11,3>(v);
+
+    // Return true if extending this immediate is profitable and the low 3-bits
+    // are zero (8-byte aligned).
+    return isConstExtProfitable(Node)  && isInt<32>(v) && ((v % 8) == 0);
+  }
+}]>;
+
+def u0AlwaysExtPred : PatLeaf<(i32 imm), [{
+  // Predicate for an unsigned 32-bit value that always needs to be extended.
+  if (Subtarget.hasV4TOps()) {
+    if (isConstExtProfitable(Node)) {
+      int64_t v = (int64_t)N->getSExtValue();
+      return isUInt<32>(v);
+    }
+  }
+  return false;
+}]>;
+
+def u6ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 6-bit unsigned field.
+    return isUInt<6>(v);
+  else {
+    if (isUInt<6>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isUInt<32>(v);
+  }
+}]>;
+
+def u7ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 7-bit unsigned field.
+    return isUInt<7>(v);
+  else {
+    if (isUInt<7>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isUInt<32>(v);
+  }
+}]>;
+
+def u8ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 8-bit unsigned field.
+    return isUInt<8>(v);
+  else {
+    if (isUInt<8>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isUInt<32>(v);
+  }
+}]>;
+
+def u9ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 9-bit unsigned field.
+    return isUInt<9>(v);
+  else {
+    if (isUInt<9>(v))
+      return true;
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isUInt<32>(v);
+  }
+}]>;
+
+def u6_1ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 7-bit unsigned field and
+    // is 2-byte aligned.
+    return isShiftedUInt<6,1>(v);
+  else {
+    if (isUInt<7>(v))
+      return isShiftedUInt<6,1>(v);
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 2) == 0);
+  }
+}]>;
+
+def u6_2ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 8-bit unsigned field and
+    // is 4-byte aligned.
+    return isShiftedUInt<6,2>(v);
+  else {
+    if (isUInt<8>(v))
+      return isShiftedUInt<6,2>(v);
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 4) == 0);
+  }
+}]>;
+
+def u6_3ExtPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (!Subtarget.hasV4TOps())
+    // Return true if the immediate can fit in a 9-bit unsigned field and
+    // is 8-byte aligned.
+    return isShiftedUInt<6,3>(v);
+  else {
+    if (isUInt<9>(v))
+      return isShiftedUInt<6,3>(v);
+
+    // Return true if extending this immediate is profitable and the value
+    // can fit in a 32-bit unsigned field.
+    return isConstExtProfitable(Node) && isUInt<32>(v) && ((v % 8) == 0);
+  }
+}]>;
+
+// Addressing modes.
+
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+def ADDRriS11_0 : ComplexPattern<i32, 2, "SelectADDRriS11_0", [frameindex], []>;
+def ADDRriS11_1 : ComplexPattern<i32, 2, "SelectADDRriS11_1", [frameindex], []>;
+def ADDRriS11_2 : ComplexPattern<i32, 2, "SelectADDRriS11_2", [frameindex], []>;
+def ADDRriS11_3 : ComplexPattern<i32, 2, "SelectADDRriS11_3", [frameindex], []>;
+def ADDRriU6_0 : ComplexPattern<i32, 2, "SelectADDRriU6_0", [frameindex], []>;
+def ADDRriU6_1 : ComplexPattern<i32, 2, "SelectADDRriU6_1", [frameindex], []>;
+def ADDRriU6_2 : ComplexPattern<i32, 2, "SelectADDRriU6_2", [frameindex], []>;
+
+// Address operands.
+
+def MEMrr : Operand<i32> {
+  let PrintMethod = "printMEMrrOperand";
+  let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+def MEMri : Operand<i32> {
+  let PrintMethod = "printMEMriOperand";
+  let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+
+def MEMri_s11_2 : Operand<i32>,
+  ComplexPattern<i32, 2, "SelectMEMriS11_2", []> {
+  let PrintMethod = "printMEMriOperand";
+  let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+def FrameIndex : Operand<i32> {
+  let PrintMethod = "printFrameIndexOperand";
+  let MIOperandInfo = (ops IntRegs, s11Imm);
+}
+
+let PrintMethod = "printGlobalOperand" in {
+  def globaladdress : Operand<i32>;
+  def globaladdressExt : Operand<i32>;
+}
+
+let PrintMethod = "printJumpTable" in
+def jumptablebase : Operand<i32>;
+
+def brtarget : Operand<OtherVT>;
+def brtargetExt : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+def bblabel : Operand<i32>;
+def bbl   : SDNode<"ISD::BasicBlock", SDTPtrLeaf   , [], "BasicBlockSDNode">;
+
+def symbolHi32 : Operand<i32> {
+  let PrintMethod = "printSymbolHi";
+}
+def symbolLo32 : Operand<i32> {
+  let PrintMethod = "printSymbolLo";
+}
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index a295015de5..7195c4a8d3 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -38,21 +38,21 @@
 #define DEBUG_TYPE "hexagon-peephole"
 #include "Hexagon.h"
 #include "HexagonTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/PassSupport.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Constants.h"
+#include "llvm/PassSupport.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include <algorithm>
 
 using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 3742486056..2985a5667b 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -14,25 +14,25 @@
 
 #include "HexagonRegisterInfo.h"
 #include "Hexagon.h"
+#include "HexagonMachineFunctionInfo.h"
 #include "HexagonSubtarget.h"
 #include "HexagonTargetMachine.h"
-#include "HexagonMachineFunctionInfo.h"
-#include "llvm/Function.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Function.h"
 #include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Type.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index 8820d13e01..e8f3cfb6c3 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -15,8 +15,8 @@
 #ifndef HexagonREGISTERINFO_H
 #define HexagonREGISTERINFO_H
 
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/MC/MachineLocation.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 #define GET_REGINFO_HEADER
 #include "HexagonGenRegisterInfo.inc"
diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
index 4d93dd18d4..a4445cb4b1 100644
--- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
+++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -12,11 +12,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "Hexagon.h"
 #include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
index a81cd913a6..814249fa68 100644
--- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
+++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
@@ -27,24 +27,25 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "xfer"
-#include "HexagonTargetMachine.h"
-#include "HexagonSubtarget.h"
+#include "Hexagon.h"
 #include "HexagonMachineFunctionInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index 5d9d6d890d..76a8fba195 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -14,8 +14,8 @@
 #ifndef Hexagon_SUBTARGET_H
 #define Hexagon_SUBTARGET_H
 
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 30866e9eeb..8fc836b458 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -15,13 +15,13 @@
 #include "Hexagon.h"
 #include "HexagonISelLowering.h"
 #include "HexagonMachineScheduler.h"
-#include "llvm/Module.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index 7a4215c119..fa669a2719 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -14,13 +14,13 @@
 #ifndef HexagonTARGETMACHINE_H
 #define HexagonTARGETMACHINE_H
 
-#include "HexagonInstrInfo.h"
-#include "HexagonSubtarget.h"
+#include "HexagonFrameLowering.h"
 #include "HexagonISelLowering.h"
+#include "HexagonInstrInfo.h"
 #include "HexagonSelectionDAGInfo.h"
-#include "HexagonFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
+#include "HexagonSubtarget.h"
 #include "llvm/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetTransformImpl.h"
 
 namespace llvm {
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index f4d7761ac3..2c1145f171 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -14,13 +14,13 @@
 #include "HexagonTargetObjectFile.h"
 #include "HexagonSubtarget.h"
 #include "HexagonTargetMachine.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
 #include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/Support/ELF.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 3d5f685028..ab6f6cf899 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -17,36 +17,35 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "packets"
+#include "Hexagon.h"
+#include "HexagonMachineFunctionInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
-#include "llvm/CodeGen/LatencyPriorityQueue.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "Hexagon.h"
-#include "HexagonTargetMachine.h"
-#include "HexagonRegisterInfo.h"
-#include "HexagonSubtarget.h"
-#include "HexagonMachineFunctionInfo.h"
-
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <map>
 
 using namespace llvm;
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
index 035afe88d5..c700354bda 100644
--- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
@@ -12,14 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "asm-printer"
+#include "HexagonInstPrinter.h"
 #include "Hexagon.h"
 #include "HexagonAsmPrinter.h"
-#include "HexagonInstPrinter.h"
 #include "HexagonMCInst.h"
-#include "llvm/MC/MCInst.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdio>
 
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 3cfa4fddd8..737789bbb5 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -13,11 +13,11 @@
 
 #include "HexagonMCTargetDesc.h"
 #include "HexagonMCAsmInfo.h"
-#include "llvm/MC/MachineLocation.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
index 59a1ed97d3..480e79f9f5 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
@@ -8,17 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/MBlazeBaseInfo.h"
-
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCTargetAsmLexer.h"
-
 #include "llvm/Support/TargetRegistry.h"
-
-#include <string>
 #include <map>
+#include <string>
 
 using namespace llvm;
 
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index f7809caeb3..2016600b9d 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -8,18 +8,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/MBlazeBaseInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index 6b958c85ee..c9a46a7361 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -12,9 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MBlaze.h"
 #include "MBlazeDisassembler.h"
-
+#include "MBlaze.h"
 #include "llvm/MC/EDInstInfo.h"
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
diff --git a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
index 586e2d3eef..bb2c31a33a 100644
--- a/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
+++ b/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
@@ -5,4 +5,4 @@ add_llvm_library(LLVMMBlazeAsmPrinter
   MBlazeInstPrinter.cpp
   )
 
-add_dependencies(LLVMMBlazeAsmPrinter MBlazeCommonTableGen)
+add_dependencies(LLVMMBlazeAsmPrinter intrinsics_gen MBlazeCommonTableGen)
diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
index a1f1dbc7a2..fc2b3d51b4 100644
--- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
+++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "asm-printer"
-#include "MBlaze.h"
 #include "MBlazeInstPrinter.h"
-#include "llvm/MC/MCInst.h"
+#include "MBlaze.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 using namespace llvm;
diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index b679a318c3..0f6f108aea 100644
--- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -15,32 +15,32 @@
 #define DEBUG_TYPE "mblaze-asm-printer"
 
 #include "MBlaze.h"
-#include "MBlazeSubtarget.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
 #include "MBlazeInstrInfo.h"
-#include "MBlazeTargetMachine.h"
-#include "MBlazeMachineFunction.h"
 #include "MBlazeMCInstLower.h"
-#include "InstPrinter/MBlazeInstPrinter.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeTargetMachine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
 #include <cctype>
 
 using namespace llvm;
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
index 19e787d862..3d0d1cecd1 100644
--- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -16,14 +16,14 @@
 
 #include "MBlaze.h"
 #include "MBlazeTargetMachine.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
index 9e467bf337..99ffa413e6 100644
--- a/lib/Target/MBlaze/MBlazeFrameLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -14,21 +14,21 @@
 #define DEBUG_TYPE "mblaze-frame-lowering"
 
 #include "MBlazeFrameLowering.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
 #include "MBlazeInstrInfo.h"
 #include "MBlazeMachineFunction.h"
-#include "InstPrinter/MBlazeInstPrinter.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Function.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
index 6b4349766f..98257a08d3 100644
--- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
+++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -17,21 +17,21 @@
 #include "MBlazeRegisterInfo.h"
 #include "MBlazeSubtarget.h"
 #include "MBlazeTargetMachine.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Type.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 310c25e839..06c150571d 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -15,13 +15,9 @@
 #define DEBUG_TYPE "mblaze-lower"
 #include "MBlazeISelLowering.h"
 #include "MBlazeMachineFunction.h"
+#include "MBlazeSubtarget.h"
 #include "MBlazeTargetMachine.h"
 #include "MBlazeTargetObjectFile.h"
-#include "MBlazeSubtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
 #include "llvm/CallingConv.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -30,6 +26,10 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
index a01fab567c..f6b4095a93 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -17,8 +17,8 @@
 
 #include "MBlaze.h"
 #include "MBlazeSubtarget.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index b5025fc8ee..79449f73f7 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -12,15 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "MBlazeInstrInfo.h"
-#include "MBlazeTargetMachine.h"
 #include "MBlazeMachineFunction.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/STLExtras.h"
 
 #define GET_INSTRINFO_CTOR
 #include "MBlazeGenInstrInfo.inc"
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
index 1c2e3b2661..5238ad0040 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -16,9 +16,9 @@
 #include "llvm/Function.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Type.h"
 #include <cstring>
 
 using namespace llvm;
@@ -104,7 +104,7 @@ Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
                                                 Type **Tys,
                                                 unsigned numTy) const {
   assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
-  AttrListPtr AList = getAttributes(M->getContext(),
+  AttributeSet AList = getAttributes(M->getContext(),
                                     (mblazeIntrinsic::ID) IntrID);
   return cast<Function>(M->getOrInsertFunction(getName(IntrID),
                                                getType(M->getContext(), IntrID),
diff --git a/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
index 6b9f42ec91..457c206fd0 100644
--- a/lib/Target/MBlaze/MBlazeMCInstLower.cpp
+++ b/lib/Target/MBlaze/MBlazeMCInstLower.cpp
@@ -14,19 +14,19 @@
 
 #include "MBlazeMCInstLower.h"
 #include "MBlazeInstrInfo.h"
-#include "llvm/Constants.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Constants.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/Target/Mangler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
 using namespace llvm;
 
 MCSymbol *MBlazeMCInstLower::
diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h
index 95cc5077cc..10d507f37b 100644
--- a/lib/Target/MBlaze/MBlazeMachineFunction.h
+++ b/lib/Target/MBlaze/MBlazeMachineFunction.h
@@ -16,8 +16,8 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 
 namespace llvm {
 
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index daa76e887f..7fa3b2f785 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -16,25 +16,25 @@
 
 #include "MBlazeRegisterInfo.h"
 #include "MBlaze.h"
-#include "MBlazeSubtarget.h"
 #include "MBlazeMachineFunction.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Constants.h"
-#include "llvm/Type.h"
 #include "llvm/Function.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Type.h"
 
 #define GET_REGINFO_TARGET_DESC
 #include "MBlazeGenRegisterInfo.inc"
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h
index eb375046f2..ed43d21f30 100644
--- a/lib/Target/MBlaze/MBlazeSubtarget.h
+++ b/lib/Target/MBlaze/MBlazeSubtarget.h
@@ -14,8 +14,8 @@
 #ifndef MBLAZESUBTARGET_H
 #define MBLAZESUBTARGET_H
 
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index f180652f11..70eb9bac6b 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -13,8 +13,8 @@
 
 #include "MBlazeTargetMachine.h"
 #include "MBlaze.h"
-#include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetOptions.h"
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index a8df4e63e3..891a57bef5 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -14,16 +14,16 @@
 #ifndef MBLAZE_TARGETMACHINE_H
 #define MBLAZE_TARGETMACHINE_H
 
-#include "MBlazeSubtarget.h"
-#include "MBlazeInstrInfo.h"
+#include "MBlazeFrameLowering.h"
 #include "MBlazeISelLowering.h"
-#include "MBlazeSelectionDAGInfo.h"
+#include "MBlazeInstrInfo.h"
 #include "MBlazeIntrinsicInfo.h"
-#include "MBlazeFrameLowering.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Target/TargetMachine.h"
+#include "MBlazeSelectionDAGInfo.h"
+#include "MBlazeSubtarget.h"
 #include "llvm/DataLayout.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetTransformImpl.h"
 
 namespace llvm {
diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
index 899c74ee8e..8a5c13cb1b 100644
--- a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -9,14 +9,14 @@
 
 #include "MBlazeTargetObjectFile.h"
 #include "MBlazeSubtarget.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 void MBlazeTargetObjectFile::
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
index 44feeb49e7..bd2013dee3 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp
@@ -8,9 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCELFSymbolFlags.h"
 #include "llvm/MC/MCExpr.h"
@@ -18,7 +19,6 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
index 2b71d9d3c8..8faff6ade4 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mccodeemitter"
-#include "MCTargetDesc/MBlazeBaseInfo.h"
 #include "MCTargetDesc/MBlazeMCTargetDesc.h"
+#include "MCTargetDesc/MBlazeBaseInfo.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCFixup.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
index 9a7549b0e7..380750d50f 100644
--- a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "MBlazeMCTargetDesc.h"
-#include "MBlazeMCAsmInfo.h"
 #include "InstPrinter/MBlazeInstPrinter.h"
+#include "MBlazeMCAsmInfo.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
diff --git a/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
index 64ac994b7f..99457b924c 100644
--- a/lib/Target/MSP430/InstPrinter/CMakeLists.txt
+++ b/lib/Target/MSP430/InstPrinter/CMakeLists.txt
@@ -4,4 +4,4 @@ add_llvm_library(LLVMMSP430AsmPrinter
   MSP430InstPrinter.cpp
   )
 
-add_dependencies(LLVMMSP430AsmPrinter MSP430CommonTableGen)
+add_dependencies(LLVMMSP430AsmPrinter intrinsics_gen MSP430CommonTableGen)
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
index 0930c453e9..4b12aeadd3 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "asm-printer"
-#include "MSP430.h"
 #include "MSP430InstPrinter.h"
-#include "llvm/MC/MCInst.h"
+#include "MSP430.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 using namespace llvm;
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index c455f6bc24..530e6aae92 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "MSP430MCTargetDesc.h"
-#include "MSP430MCAsmInfo.h"
 #include "InstPrinter/MSP430InstPrinter.h"
+#include "MSP430MCAsmInfo.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 86bc183c1b..4a08a1ff9f 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -14,26 +14,26 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "MSP430.h"
+#include "InstPrinter/MSP430InstPrinter.h"
 #include "MSP430InstrInfo.h"
 #include "MSP430MCInstLower.h"
 #include "MSP430TargetMachine.h"
-#include "InstPrinter/MSP430InstPrinter.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
+#include "llvm/Module.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index bdeb0c590f..f128427f80 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -18,11 +18,11 @@
 #define DEBUG_TYPE "msp430-branch-select"
 #include "MSP430.h"
 #include "MSP430InstrInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 STATISTIC(NumExpanded, "Number of branches expanded to long format");
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index 2e170f17bf..1b32688489 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -14,15 +14,15 @@
 #include "MSP430FrameLowering.h"
 #include "MSP430InstrInfo.h"
 #include "MSP430MachineFunctionInfo.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Function.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 5efc6a36b8..ff109615b1 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -13,22 +13,22 @@
 
 #include "MSP430.h"
 #include "MSP430TargetMachine.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
 #include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index c79e5f181b..8a1bd0958b 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -16,14 +16,9 @@
 #include "MSP430ISelLowering.h"
 #include "MSP430.h"
 #include "MSP430MachineFunctionInfo.h"
-#include "MSP430TargetMachine.h"
 #include "MSP430Subtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
+#include "MSP430TargetMachine.h"
 #include "llvm/CallingConv.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -32,6 +27,11 @@
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index be332f05b3..c99f1f895e 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -15,10 +15,10 @@
 #include "MSP430.h"
 #include "MSP430MachineFunctionInfo.h"
 #include "MSP430TargetMachine.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Function.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index b1773fba7e..043e5becad 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MSP430MCInstLower.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -20,10 +21,9 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
 using namespace llvm;
 
 MCSymbol *MSP430MCInstLower::
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 9ae238f66f..a2782e39d9 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -17,14 +17,14 @@
 #include "MSP430.h"
 #include "MSP430MachineFunctionInfo.h"
 #include "MSP430TargetMachine.h"
-#include "llvm/Function.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/Support/ErrorHandling.h"
 
 #define GET_REGINFO_TARGET_DESC
 #include "MSP430GenRegisterInfo.inc"
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 13e37b3735..062c119410 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -13,9 +13,9 @@
 
 #include "MSP430TargetMachine.h"
 #include "MSP430.h"
-#include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index 186172ede4..8a94d746de 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -15,11 +15,11 @@
 #ifndef LLVM_TARGET_MSP430_TARGETMACHINE_H
 #define LLVM_TARGET_MSP430_TARGETMACHINE_H
 
-#include "MSP430InstrInfo.h"
-#include "MSP430ISelLowering.h"
 #include "MSP430FrameLowering.h"
-#include "MSP430SelectionDAGInfo.h"
+#include "MSP430ISelLowering.h"
+#include "MSP430InstrInfo.h"
 #include "MSP430RegisterInfo.h"
+#include "MSP430SelectionDAGInfo.h"
 #include "MSP430Subtarget.h"
 #include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index 539a1f723b..c3e83725d6 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -12,14 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
-#include "llvm/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 static bool isAcceptableChar(char C, bool AllowPeriod, bool AllowUTF8) {
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 67b524883c..59fefa1268 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -13,11 +13,11 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/Support/TargetRegistry.h"
 
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index 82dbcc5bcf..099a1e79a5 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -12,17 +12,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "Mips.h"
-#include "MipsSubtarget.h"
 #include "MipsRegisterInfo.h"
+#include "MipsSubtarget.h"
 #include "llvm/MC/EDInstInfo.h"
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
-#include "llvm/Support/MemoryObject.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
 
+// Not a normal header, this must come last.
 #include "MipsGenEDInfo.inc"
 
 using namespace llvm;
diff --git a/lib/Target/Mips/InstPrinter/CMakeLists.txt b/lib/Target/Mips/InstPrinter/CMakeLists.txt
index 3e9fbf1c55..c3f4a6e1be 100644
--- a/lib/Target/Mips/InstPrinter/CMakeLists.txt
+++ b/lib/Target/Mips/InstPrinter/CMakeLists.txt
@@ -4,4 +4,4 @@ add_llvm_library(LLVMMipsAsmPrinter
   MipsInstPrinter.cpp
   )
 
-add_dependencies(LLVMMipsAsmPrinter MipsCommonTableGen)
+add_dependencies(LLVMMipsAsmPrinter intrinsics_gen MipsCommonTableGen)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index f253ef35d0..9242a0bcc9 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -254,7 +254,7 @@ public:
 class NaClMipsAsmBackend : public MipsAsmBackend {
 public:
   NaClMipsAsmBackend(const Target &T, bool _is64Bit)
-    : MipsAsmBackend(T, Triple::NativeClient, /* IsLittle */ true, _is64Bit) {}
+    : MipsAsmBackend(T, Triple::NaCl, /* IsLittle */ true, _is64Bit) {}
 
   unsigned getBundleSize() const {
     return 16;
@@ -300,4 +300,3 @@ MCAsmBackend *llvm::createMipsAsmBackendEB64(const Target &T, StringRef TT,
   return new MipsAsmBackend(T, Triple(TT).getOS(),
                             /*IsLittle*/false, /*Is64Bit*/true);
 }
-
diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index 94e0d20d88..7a55efd5c3 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -121,99 +121,6 @@ namespace MipsII {
   };
 }
 
-
-/// getMipsRegisterNumbering - Given the enum value for some register,
-/// return the number that it corresponds to.
-inline static unsigned getMipsRegisterNumbering(unsigned RegEnum)
-{
-  switch (RegEnum) {
-  case Mips::ZERO: case Mips::ZERO_64: case Mips::F0: case Mips::D0_64:
-  case Mips::D0:   case Mips::FCC0:    case Mips::AC0:
-    return 0;
-  case Mips::AT: case Mips::AT_64: case Mips::F1: case Mips::D1_64:
-  case Mips::AC1:
-    return 1;
-  case Mips::V0: case Mips::V0_64: case Mips::F2: case Mips::D2_64:
-  case Mips::D1: case Mips::AC2:
-    return 2;
-  case Mips::V1: case Mips::V1_64: case Mips::F3: case Mips::D3_64:
-  case Mips::AC3:
-    return 3;
-  case Mips::A0: case Mips::A0_64: case Mips::F4: case Mips::D4_64:
-  case Mips::D2:
-    return 4;
-  case Mips::A1: case Mips::A1_64: case Mips::F5: case Mips::D5_64:
-    return 5;
-  case Mips::A2: case Mips::A2_64: case Mips::F6: case Mips::D6_64:
-  case Mips::D3:
-    return 6;
-  case Mips::A3: case Mips::A3_64: case Mips::F7: case Mips::D7_64:
-    return 7;
-  case Mips::T0: case Mips::T0_64: case Mips::F8: case Mips::D8_64:
-  case Mips::D4:
-    return 8;
-  case Mips::T1: case Mips::T1_64: case Mips::F9: case Mips::D9_64:
-    return 9;
-  case Mips::T2: case Mips::T2_64: case Mips::F10: case Mips::D10_64:
-  case Mips::D5:
-    return 10;
-  case Mips::T3: case Mips::T3_64: case Mips::F11: case Mips::D11_64:
-    return 11;
-  case Mips::T4: case Mips::T4_64: case Mips::F12: case Mips::D12_64:
-  case Mips::D6:
-    return 12;
-  case Mips::T5: case Mips::T5_64: case Mips::F13: case Mips::D13_64:
-    return 13;
-  case Mips::T6: case Mips::T6_64: case Mips::F14: case Mips::D14_64:
-  case Mips::D7:
-    return 14;
-  case Mips::T7: case Mips::T7_64: case Mips::F15: case Mips::D15_64:
-    return 15;
-  case Mips::S0: case Mips::S0_64: case Mips::F16: case Mips::D16_64:
-  case Mips::D8:
-    return 16;
-  case Mips::S1: case Mips::S1_64: case Mips::F17: case Mips::D17_64:
-    return 17;
-  case Mips::S2: case Mips::S2_64: case Mips::F18: case Mips::D18_64:
-  case Mips::D9:
-    return 18;
-  case Mips::S3: case Mips::S3_64: case Mips::F19: case Mips::D19_64:
-    return 19;
-  case Mips::S4: case Mips::S4_64: case Mips::F20: case Mips::D20_64:
-  case Mips::D10:
-    return 20;
-  case Mips::S5: case Mips::S5_64: case Mips::F21: case Mips::D21_64:
-    return 21;
-  case Mips::S6: case Mips::S6_64: case Mips::F22: case Mips::D22_64:
-  case Mips::D11:
-    return 22;
-  case Mips::S7: case Mips::S7_64: case Mips::F23: case Mips::D23_64:
-    return 23;
-  case Mips::T8: case Mips::T8_64: case Mips::F24: case Mips::D24_64:
-  case Mips::D12:
-    return 24;
-  case Mips::T9: case Mips::T9_64: case Mips::F25: case Mips::D25_64:
-    return 25;
-  case Mips::K0: case Mips::K0_64: case Mips::F26: case Mips::D26_64:
-  case Mips::D13:
-    return 26;
-  case Mips::K1: case Mips::K1_64: case Mips::F27: case Mips::D27_64:
-    return 27;
-  case Mips::GP: case Mips::GP_64: case Mips::F28: case Mips::D28_64:
-  case Mips::D14:
-    return 28;
-  case Mips::SP: case Mips::SP_64: case Mips::F29: case Mips::D29_64:
-  case Mips::HWR29:
-    return 29;
-  case Mips::FP: case Mips::FP_64: case Mips::F30: case Mips::D30_64:
-  case Mips::D15:
-    return 30;
-  case Mips::RA: case Mips::RA_64: case Mips::F31: case Mips::D31_64:
-    return 31;
-  default: llvm_unreachable("Unknown register number!");
-  }
-}
-
 inline static std::pair<const MCSymbolRefExpr*, int64_t>
 MipsGetSymAndOffset(const MCFixup &Fixup) {
   MCFixupKind FixupKind = Fixup.getKind();
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index da1e4552c9..4b68b7e6fe 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -33,11 +34,12 @@ class MipsMCCodeEmitter : public MCCodeEmitter {
   MipsMCCodeEmitter(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION;
   void operator=(const MipsMCCodeEmitter &) LLVM_DELETED_FUNCTION;
   const MCInstrInfo &MCII;
+  MCContext &Ctx;
   bool IsLittleEndian;
 
 public:
-  MipsMCCodeEmitter(const MCInstrInfo &mcii, bool IsLittle) :
-            MCII(mcii), IsLittleEndian(IsLittle) {}
+  MipsMCCodeEmitter(const MCInstrInfo &mcii, MCContext &Ctx_, bool IsLittle) :
+    MCII(mcii), Ctx(Ctx_), IsLittleEndian(IsLittle) {}
 
   ~MipsMCCodeEmitter() {}
 
@@ -93,7 +95,7 @@ MCCodeEmitter *llvm::createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
                                                const MCSubtargetInfo &STI,
                                                MCContext &Ctx)
 {
-  return new MipsMCCodeEmitter(MCII, false);
+  return new MipsMCCodeEmitter(MCII, Ctx, false);
 }
 
 MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
@@ -101,7 +103,7 @@ MCCodeEmitter *llvm::createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
                                                const MCSubtargetInfo &STI,
                                                MCContext &Ctx)
 {
-  return new MipsMCCodeEmitter(MCII, true);
+  return new MipsMCCodeEmitter(MCII, Ctx, true);
 }
 
 /// EncodeInstruction - Emit the instruction.
@@ -200,7 +202,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
                   SmallVectorImpl<MCFixup> &Fixups) const {
   if (MO.isReg()) {
     unsigned Reg = MO.getReg();
-    unsigned RegNo = getMipsRegisterNumbering(Reg);
+    unsigned RegNo = Ctx.getRegisterInfo().getEncodingValue(Reg);
     return RegNo;
   } else if (MO.isImm()) {
     return static_cast<unsigned>(MO.getImm());
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index f634f082be..936097137d 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -11,15 +11,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MipsMCAsmInfo.h"
 #include "MipsMCTargetDesc.h"
 #include "InstPrinter/MipsInstPrinter.h"
-#include "llvm/MC/MachineLocation.h"
+#include "MipsMCAsmInfo.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 31194ae610..23e2a948a5 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -52,10 +52,6 @@ def FeatureSEInReg     : SubtargetFeature<"seinreg", "HasSEInReg", "true",
                                 "Enable 'signext in register' instructions.">;
 def FeatureCondMov     : SubtargetFeature<"condmov", "HasCondMov", "true",
                                 "Enable 'conditional move' instructions.">;
-def FeatureMulDivAdd   : SubtargetFeature<"muldivadd", "HasMulDivAdd", "true",
-                                "Enable 'multiply add/sub' instructions.">;
-def FeatureMinMax      : SubtargetFeature<"minmax", "HasMinMax", "true",
-                                "Enable 'min/max' instructions.">;
 def FeatureSwap        : SubtargetFeature<"swap", "HasSwap", "true",
                                 "Enable 'byte/half swap' instructions.">;
 def FeatureBitCount    : SubtargetFeature<"bitcount", "HasBitCount", "true",
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index 4e6b21feb5..d17c1259e5 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -12,17 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "Mips16FrameLowering.h"
-#include "MipsInstrInfo.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/Function.h"
+#include "MipsInstrInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Function.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 619646b317..10b696fe87 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -12,15 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "Mips16InstrInfo.h"
-#include "MipsTargetMachine.h"
-#include "MipsMachineFunction.h"
 #include "InstPrinter/MipsInstPrinter.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index e06ccfe61c..d6ef8d2bf1 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -14,8 +14,8 @@
 #ifndef MIPS16INSTRUCTIONINFO_H
 #define MIPS16INSTRUCTIONINFO_H
 
-#include "MipsInstrInfo.h"
 #include "Mips16RegisterInfo.h"
+#include "MipsInstrInfo.h"
 
 namespace llvm {
 
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index 5defc75ea6..1e560c6049 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -1539,4 +1539,4 @@ def : Wrapper16Pat<tglobaltlsaddr, AddiuRxRxImmX16, CPU16Regs>;
 def : Mips16Pat<(i32 (extloadi8   addr16:$src)),
                 (LbuRxRyOffMemX16  addr16:$src)>;
 def : Mips16Pat<(i32 (extloadi16  addr16:$src)),
-                (LhuRxRyOffMemX16  addr16:$src)>;
-\ No newline at end of file
+                (LhuRxRyOffMemX16  addr16:$src)>;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index d7397a32f0..6b87ecb08c 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -12,30 +12,30 @@
 //===----------------------------------------------------------------------===//
 
 #include "Mips16RegisterInfo.h"
-#include "Mips16InstrInfo.h"
 #include "Mips.h"
+#include "Mips16InstrInfo.h"
 #include "MipsAnalyzeImmediate.h"
 #include "MipsInstrInfo.h"
-#include "MipsSubtarget.h"
 #include "MipsMachineFunction.h"
+#include "MipsSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Constants.h"
 #include "llvm/DebugInfo.h"
-#include "llvm/Type.h"
 #include "llvm/Function.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Type.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 83322eac8c..1c852e2fc4 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -50,23 +50,23 @@ class Div64<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
 
 multiclass Atomic2Ops64<PatFrag Op, string Opstr> {
   def #NAME# : Atomic2Ops<Op, Opstr, CPU64Regs, CPURegs>,
-               Requires<[NotN64, HasStandardEncoding]>;
+               Requires<[NotN64, HasStdEnc]>;
   def _P8    : Atomic2Ops<Op, Opstr, CPU64Regs, CPU64Regs>,
-               Requires<[IsN64, HasStandardEncoding]> {
+               Requires<[IsN64, HasStdEnc]> {
     let isCodeGenOnly = 1;
   }
 }
 
 multiclass AtomicCmpSwap64<PatFrag Op, string Width>  {
   def #NAME# : AtomicCmpSwap<Op, Width, CPU64Regs, CPURegs>,
-               Requires<[NotN64, HasStandardEncoding]>;
+               Requires<[NotN64, HasStdEnc]>;
   def _P8    : AtomicCmpSwap<Op, Width, CPU64Regs, CPU64Regs>,
-               Requires<[IsN64, HasStandardEncoding]> {
+               Requires<[IsN64, HasStdEnc]> {
     let isCodeGenOnly = 1;
   }
 }
 }
-let usesCustomInserter = 1, Predicates = [HasMips64, HasStandardEncoding],
+let usesCustomInserter = 1, Predicates = [HasStdEnc],
   DecoderNamespace = "Mips64" in {
   defm ATOMIC_LOAD_ADD_I64  : Atomic2Ops64<atomic_load_add_64, "load_add_64">;
   defm ATOMIC_LOAD_SUB_I64  : Atomic2Ops64<atomic_load_sub_64, "load_sub_64">;
@@ -119,7 +119,7 @@ let Pattern = []<dag> in {
 }
 }
 // Rotate Instructions
-let Predicates = [HasMips64r2, HasStandardEncoding],
+let Predicates = [HasMips64r2, HasStdEnc],
     DecoderNamespace = "Mips64" in {
   def DROTR    : shift_rotate_imm64<0x3a, 0x01, "drotr", rotr>;
   def DROTRV   : shift_rotate_reg<0x16, 0x01, "drotrv", rotr, CPU64Regs>;
@@ -154,15 +154,15 @@ defm SDR   : StoreLeftRightM64<0x2d, "sdr", MipsSDR>;
 
 /// Load-linked, Store-conditional
 def LLD    : LLBase<0x34, "lld", CPU64Regs, mem>,
-             Requires<[NotN64, HasStandardEncoding]>;
+             Requires<[NotN64, HasStdEnc]>;
 def LLD_P8 : LLBase<0x34, "lld", CPU64Regs, mem64>,
-             Requires<[IsN64, HasStandardEncoding]> {
+             Requires<[IsN64, HasStdEnc]> {
   let isCodeGenOnly = 1;
 }
 def SCD    : SCBase<0x3c, "scd", CPU64Regs, mem>,
-             Requires<[NotN64, HasStandardEncoding]>;
+             Requires<[NotN64, HasStdEnc]>;
 def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>,
-             Requires<[IsN64, HasStandardEncoding]> {
+             Requires<[IsN64, HasStdEnc]> {
   let isCodeGenOnly = 1;
 }
 
@@ -205,9 +205,6 @@ def DSHD : SubwordSwap<0x24, 0x5, "dshd", CPU64Regs>;
 
 def LEA_ADDiu64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>;
 }
-let Uses = [SP_64], DecoderNamespace = "Mips64" in
-def DynAlloc64 : EffectiveAddress<0x19,"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>,
-                 Requires<[IsN64, HasStandardEncoding]>;
 let DecoderNamespace = "Mips64" in {
 def RDHWR64 : ReadHardware<CPU64Regs, HWRegs64>;
 
@@ -236,13 +233,13 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
 //===----------------------------------------------------------------------===//
 
 // extended loads
-let Predicates = [NotN64, HasStandardEncoding] in {
+let Predicates = [NotN64, HasStdEnc] in {
   def : MipsPat<(i64 (extloadi1  addr:$src)), (LB64 addr:$src)>;
   def : MipsPat<(i64 (extloadi8  addr:$src)), (LB64 addr:$src)>;
   def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64 addr:$src)>;
   def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64 addr:$src)>;
 }
-let Predicates = [IsN64, HasStandardEncoding] in {
+let Predicates = [IsN64, HasStdEnc] in {
   def : MipsPat<(i64 (extloadi1  addr:$src)), (LB64_P8 addr:$src)>;
   def : MipsPat<(i64 (extloadi8  addr:$src)), (LB64_P8 addr:$src)>;
   def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64_P8 addr:$src)>;
@@ -293,14 +290,10 @@ defm : SetgtPats<CPU64Regs, SLT64, SLTu64>;
 defm : SetgePats<CPU64Regs, SLT64, SLTu64>;
 defm : SetgeImmPats<CPU64Regs, SLTi64, SLTiu64>;
 
-// select MipsDynAlloc
-def : MipsPat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>,
-      Requires<[IsN64, HasStandardEncoding]>;
-
 // truncate
 def : MipsPat<(i32 (trunc CPU64Regs:$src)),
               (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>,
-      Requires<[IsN64, HasStandardEncoding]>;
+      Requires<[IsN64, HasStdEnc]>;
 
 // 32-to-64-bit extension
 def : MipsPat<(i64 (anyext CPURegs:$src)), (SLL64_32 CPURegs:$src)>;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 3611828cdc..8269ec6a61 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -13,12 +13,12 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mips-asm-printer"
-#include "Mips.h"
 #include "MipsAsmPrinter.h"
-#include "MipsInstrInfo.h"
-#include "MipsMCInstLower.h"
 #include "InstPrinter/MipsInstPrinter.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsMCInstLower.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
@@ -28,16 +28,16 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/DataLayout.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
 
@@ -139,7 +139,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
     if (Mips::CPURegsRegClass.contains(Reg))
       break;
 
-    unsigned RegNum = getMipsRegisterNumbering(Reg);
+    unsigned RegNum = TM.getRegisterInfo()->getEncodingValue(Reg);
     if (Mips::AFGR64RegClass.contains(Reg)) {
       FPUBitmask |= (3 << RegNum);
       CSFPRegsSize += AFGR64RegSize;
@@ -154,7 +154,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
   // Set CPU Bitmask.
   for (; i != e; ++i) {
     unsigned Reg = CSI[i].getReg();
-    unsigned RegNum = getMipsRegisterNumbering(Reg);
+    unsigned RegNum = TM.getRegisterInfo()->getEncodingValue(Reg);
     CPUBitmask |= (1 << RegNum);
   }
 
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index efed6357a4..8ade92a566 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -14,8 +14,8 @@
 #ifndef MIPSASMPRINTER_H
 #define MIPSASMPRINTER_H
 
-#include "MipsMachineFunction.h"
 #include "MipsMCInstLower.h"
+#include "MipsMachineFunction.h"
 #include "MipsSubtarget.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/Support/Compiler.h"
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index 4bfccd8fdd..932e6b3d1e 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -14,11 +14,11 @@
 
 #define DEBUG_TYPE "jit"
 #include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "MipsInstrInfo.h"
 #include "MipsRelocations.h"
 #include "MipsSubtarget.h"
 #include "MipsTargetMachine.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -85,7 +85,7 @@ class MipsCodeEmitter : public MachineFunctionPass {
 
   private:
 
-    void emitWordLE(unsigned Word);
+    void emitWord(unsigned Word);
 
     /// Routines that handle operands which add machine relocations which are
     /// fixed up by the relocation stage.
@@ -112,12 +112,6 @@ class MipsCodeEmitter : public MachineFunctionPass {
     unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const;
     unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const;
 
-    int emitULW(const MachineInstr &MI);
-    int emitUSW(const MachineInstr &MI);
-    int emitULH(const MachineInstr &MI);
-    int emitULHu(const MachineInstr &MI);
-    int emitUSH(const MachineInstr &MI);
-
     void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc,
                                     int Offset) const;
   };
@@ -133,7 +127,7 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
   MCPEs = &MF.getConstantPool()->getConstants();
   MJTEs = 0;
   if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
-  JTI->Initialize(MF, IsPIC);
+  JTI->Initialize(MF, IsPIC, Subtarget->isLittle());
   MCE.setModuleInfo(&getAnalysis<MachineModuleInfo> ());
 
   do {
@@ -215,7 +209,7 @@ unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI,
 unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
                                             const MachineOperand &MO) const {
   if (MO.isReg())
-    return getMipsRegisterNumbering(MO.getReg());
+    return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
   else if (MO.isImm())
     return static_cast<unsigned>(MO.getImm());
   else if (MO.isGlobal())
@@ -271,103 +265,6 @@ void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
                                              Reloc, BB));
 }
 
-int MipsCodeEmitter::emitUSW(const MachineInstr &MI) {
-  unsigned src = getMachineOpValue(MI, MI.getOperand(0));
-  unsigned base = getMachineOpValue(MI, MI.getOperand(1));
-  unsigned offset = getMachineOpValue(MI, MI.getOperand(2));
-  // swr src, offset(base)
-  // swl src, offset+3(base)
-  MCE.emitWordLE(
-    (0x2e << 26) | (base << 21) | (src << 16) | (offset & 0xffff));
-  MCE.emitWordLE(
-    (0x2a << 26) | (base << 21) | (src << 16) | ((offset+3) & 0xffff));
-  return 2;
-}
-
-int MipsCodeEmitter::emitULW(const MachineInstr &MI) {
-  unsigned dst = getMachineOpValue(MI, MI.getOperand(0));
-  unsigned base = getMachineOpValue(MI, MI.getOperand(1));
-  unsigned offset = getMachineOpValue(MI, MI.getOperand(2));
-  unsigned at = 1;
-  if (dst != base) {
-    // lwr dst, offset(base)
-    // lwl dst, offset+3(base)
-    MCE.emitWordLE(
-      (0x26 << 26) | (base << 21) | (dst << 16) | (offset & 0xffff));
-    MCE.emitWordLE(
-      (0x22 << 26) | (base << 21) | (dst << 16) | ((offset+3) & 0xffff));
-    return 2;
-  } else {
-    // lwr at, offset(base)
-    // lwl at, offset+3(base)
-    // addu dst, at, $zero
-    MCE.emitWordLE(
-      (0x26 << 26) | (base << 21) | (at << 16) | (offset & 0xffff));
-    MCE.emitWordLE(
-      (0x22 << 26) | (base << 21) | (at << 16) | ((offset+3) & 0xffff));
-    MCE.emitWordLE(
-      (0x0 << 26) | (at << 21) | (0x0 << 16) | (dst << 11) | (0x0 << 6) | 0x21);
-    return 3;
-  }
-}
-
-int MipsCodeEmitter::emitUSH(const MachineInstr &MI) {
-  unsigned src = getMachineOpValue(MI, MI.getOperand(0));
-  unsigned base = getMachineOpValue(MI, MI.getOperand(1));
-  unsigned offset = getMachineOpValue(MI, MI.getOperand(2));
-  unsigned at = 1;
-  // sb src, offset(base)
-  // srl at,src,8
-  // sb at, offset+1(base)
-  MCE.emitWordLE(
-    (0x28 << 26) | (base << 21) | (src << 16) | (offset & 0xffff));
-  MCE.emitWordLE(
-    (0x0 << 26) | (0x0 << 21) | (src << 16) | (at << 11) | (0x8 << 6) | 0x2);
-  MCE.emitWordLE(
-    (0x28 << 26) | (base << 21) | (at << 16) | ((offset+1) & 0xffff));
-  return 3;
-}
-
-int MipsCodeEmitter::emitULH(const MachineInstr &MI) {
-  unsigned dst = getMachineOpValue(MI, MI.getOperand(0));
-  unsigned base = getMachineOpValue(MI, MI.getOperand(1));
-  unsigned offset = getMachineOpValue(MI, MI.getOperand(2));
-  unsigned at = 1;
-  // lbu at, offset(base)
-  // lb dst, offset+1(base)
-  // sll dst,dst,8
-  // or dst,dst,at
-  MCE.emitWordLE(
-    (0x24 << 26) | (base << 21) | (at << 16) | (offset & 0xffff));
-  MCE.emitWordLE(
-    (0x20 << 26) | (base << 21) | (dst << 16) | ((offset+1) & 0xffff));
-  MCE.emitWordLE(
-    (0x0 << 26) | (0x0 << 21) | (dst << 16) | (dst << 11) | (0x8 << 6) | 0x0);
-  MCE.emitWordLE(
-    (0x0 << 26) | (dst << 21) | (at << 16) | (dst << 11) | (0x0 << 6) | 0x25);
-  return 4;
-}
-
-int MipsCodeEmitter::emitULHu(const MachineInstr &MI) {
-  unsigned dst = getMachineOpValue(MI, MI.getOperand(0));
-  unsigned base = getMachineOpValue(MI, MI.getOperand(1));
-  unsigned offset = getMachineOpValue(MI, MI.getOperand(2));
-  unsigned at = 1;
-  // lbu at, offset(base)
-  // lbu dst, offset+1(base)
-  // sll dst,dst,8
-  // or dst,dst,at
-  MCE.emitWordLE(
-    (0x24 << 26) | (base << 21) | (at << 16) | (offset & 0xffff));
-  MCE.emitWordLE(
-    (0x24 << 26) | (base << 21) | (dst << 16) | ((offset+1) & 0xffff));
-  MCE.emitWordLE(
-    (0x0 << 26) | (0x0 << 21) | (dst << 16) | (dst << 11) | (0x8 << 6) | 0x0);
-  MCE.emitWordLE(
-    (0x0 << 26) | (dst << 21) | (at << 16) | (dst << 11) | (0x0 << 6) | 0x25);
-  return 4;
-}
-
 void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) {
   DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
 
@@ -377,16 +274,19 @@ void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) {
   if ((MI.getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo)
     return;
 
-  emitWordLE(getBinaryCodeForInstr(MI));
+  emitWord(getBinaryCodeForInstr(MI));
   ++NumEmitted;  // Keep track of the # of mi's emitted
 
   MCE.processDebugLoc(MI.getDebugLoc(), false);
 }
 
-void MipsCodeEmitter::emitWordLE(unsigned Word) {
+void MipsCodeEmitter::emitWord(unsigned Word) {
   DEBUG(errs() << "  0x";
         errs().write_hex(Word) << "\n");
-  MCE.emitWordLE(Word);
+  if (Subtarget->isLittle())
+    MCE.emitWordLE(Word);
+  else
+    MCE.emitWordBE(Word);
 }
 
 /// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips
diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td
index b12b1f2b5a..457d238a01 100644
--- a/lib/Target/Mips/MipsCondMov.td
+++ b/lib/Target/Mips/MipsCondMov.td
@@ -107,7 +107,7 @@ multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst,
 
 // Instantiation of instructions.
 def MOVZ_I_I     : CondMovIntInt<CPURegs, CPURegs, 0x0a, "movz">;
-let Predicates = [HasMips64, HasStandardEncoding],
+let Predicates = [HasStdEnc],
                   DecoderNamespace = "Mips64" in {
   def MOVZ_I_I64   : CondMovIntInt<CPURegs, CPU64Regs, 0x0a, "movz">;
   def MOVZ_I64_I   : CondMovIntInt<CPU64Regs, CPURegs, 0x0a, "movz"> {
@@ -119,7 +119,7 @@ let Predicates = [HasMips64, HasStandardEncoding],
 }
 
 def MOVN_I_I     : CondMovIntInt<CPURegs, CPURegs, 0x0b, "movn">;
-let Predicates = [HasMips64, HasStandardEncoding],
+let Predicates = [HasStdEnc],
                   DecoderNamespace = "Mips64" in {
   def MOVN_I_I64   : CondMovIntInt<CPURegs, CPU64Regs, 0x0b, "movn">;
   def MOVN_I64_I   : CondMovIntInt<CPU64Regs, CPURegs, 0x0b, "movn"> {
@@ -132,21 +132,21 @@ let Predicates = [HasMips64, HasStandardEncoding],
 
 def MOVZ_I_S   : CondMovIntFP<CPURegs, FGR32, 16, 18, "movz.s">;
 def MOVZ_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 18, "movz.s">,
-                 Requires<[HasMips64, HasStandardEncoding]> {
+                 Requires<[HasMips64, HasStdEnc]> {
   let DecoderNamespace = "Mips64";
 }
 
 def MOVN_I_S   : CondMovIntFP<CPURegs, FGR32, 16, 19, "movn.s">;
 def MOVN_I64_S : CondMovIntFP<CPU64Regs, FGR32, 16, 19, "movn.s">,
-                 Requires<[HasMips64, HasStandardEncoding]> {
+                 Requires<[HasMips64, HasStdEnc]> {
   let DecoderNamespace = "Mips64";
 }
 
-let Predicates = [NotFP64bit, HasStandardEncoding] in {
+let Predicates = [NotFP64bit, HasStdEnc] in {
   def MOVZ_I_D32   : CondMovIntFP<CPURegs, AFGR64, 17, 18, "movz.d">;
   def MOVN_I_D32   : CondMovIntFP<CPURegs, AFGR64, 17, 19, "movn.d">;
 }
-let Predicates = [IsFP64bit, HasStandardEncoding],
+let Predicates = [IsFP64bit, HasStdEnc],
                   DecoderNamespace = "Mips64" in {
   def MOVZ_I_D64   : CondMovIntFP<CPURegs, FGR64, 17, 18, "movz.d">;
   def MOVZ_I64_D64 : CondMovIntFP<CPU64Regs, FGR64, 17, 18, "movz.d"> {
@@ -160,24 +160,24 @@ let Predicates = [IsFP64bit, HasStandardEncoding],
 
 def MOVT_I   : CondMovFPInt<CPURegs, MipsCMovFP_T, 1, "movt">;
 def MOVT_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_T, 1, "movt">,
-               Requires<[HasMips64, HasStandardEncoding]> {
+               Requires<[HasMips64, HasStdEnc]> {
   let DecoderNamespace = "Mips64";
 }
 
 def MOVF_I   : CondMovFPInt<CPURegs, MipsCMovFP_F, 0, "movf">;
 def MOVF_I64 : CondMovFPInt<CPU64Regs, MipsCMovFP_F, 0, "movf">,
-               Requires<[HasMips64, HasStandardEncoding]> {
+               Requires<[HasMips64, HasStdEnc]> {
   let DecoderNamespace = "Mips64";
 }
 
 def MOVT_S : CondMovFPFP<FGR32, MipsCMovFP_T, 16, 1, "movt.s">;
 def MOVF_S : CondMovFPFP<FGR32, MipsCMovFP_F, 16, 0, "movf.s">;
 
-let Predicates = [NotFP64bit, HasStandardEncoding] in {
+let Predicates = [NotFP64bit, HasStdEnc] in {
   def MOVT_D32 : CondMovFPFP<AFGR64, MipsCMovFP_T, 17, 1, "movt.d">;
   def MOVF_D32 : CondMovFPFP<AFGR64, MipsCMovFP_F, 17, 0, "movf.d">;
 }
-let Predicates = [IsFP64bit, HasStandardEncoding],
+let Predicates = [IsFP64bit, HasStdEnc],
     DecoderNamespace = "Mips64" in {
   def MOVT_D64 : CondMovFPFP<FGR64, MipsCMovFP_T, 17, 1, "movt.d">;
   def MOVF_D64 : CondMovFPFP<FGR64, MipsCMovFP_F, 17, 0, "movf.d">;
@@ -187,7 +187,7 @@ let Predicates = [IsFP64bit, HasStandardEncoding],
 defm : MovzPats0<CPURegs, CPURegs, MOVZ_I_I, SLT, SLTu, SLTi, SLTiu>;
 defm : MovzPats1<CPURegs, CPURegs, MOVZ_I_I, XOR>;
 defm : MovzPats2<CPURegs, CPURegs, MOVZ_I_I, XORi>;
-let Predicates = [HasMips64, HasStandardEncoding] in {
+let Predicates = [HasMips64, HasStdEnc] in {
   defm : MovzPats0<CPURegs, CPU64Regs, MOVZ_I_I64, SLT, SLTu, SLTi, SLTiu>;
   defm : MovzPats0<CPU64Regs, CPURegs, MOVZ_I_I, SLT64, SLTu64, SLTi64,
                    SLTiu64>;
@@ -202,7 +202,7 @@ let Predicates = [HasMips64, HasStandardEncoding] in {
 }
 
 defm : MovnPats<CPURegs, CPURegs, MOVN_I_I, XOR>;
-let Predicates = [HasMips64, HasStandardEncoding] in {
+let Predicates = [HasMips64, HasStdEnc] in {
   defm : MovnPats<CPURegs, CPU64Regs, MOVN_I_I64, XOR>;
   defm : MovnPats<CPU64Regs, CPURegs, MOVN_I64_I, XOR64>;
   defm : MovnPats<CPU64Regs, CPU64Regs, MOVN_I64_I64, XOR64>;
@@ -211,19 +211,19 @@ let Predicates = [HasMips64, HasStandardEncoding] in {
 defm : MovzPats0<CPURegs, FGR32, MOVZ_I_S, SLT, SLTu, SLTi, SLTiu>;
 defm : MovzPats1<CPURegs, FGR32, MOVZ_I_S, XOR>;
 defm : MovnPats<CPURegs, FGR32, MOVN_I_S, XOR>;
-let Predicates = [HasMips64, HasStandardEncoding] in {
+let Predicates = [HasMips64, HasStdEnc] in {
   defm : MovzPats0<CPU64Regs, FGR32, MOVZ_I_S, SLT64, SLTu64, SLTi64,
                    SLTiu64>;
   defm : MovzPats1<CPU64Regs, FGR32, MOVZ_I64_S, XOR64>;
   defm : MovnPats<CPU64Regs, FGR32, MOVN_I64_S, XOR64>;
 }
 
-let Predicates = [NotFP64bit, HasStandardEncoding] in {
+let Predicates = [NotFP64bit, HasStdEnc] in {
   defm : MovzPats0<CPURegs, AFGR64, MOVZ_I_D32, SLT, SLTu, SLTi, SLTiu>;
   defm : MovzPats1<CPURegs, AFGR64, MOVZ_I_D32, XOR>;
   defm : MovnPats<CPURegs, AFGR64, MOVN_I_D32, XOR>;
 }
-let Predicates = [IsFP64bit, HasStandardEncoding] in {
+let Predicates = [IsFP64bit, HasStdEnc] in {
   defm : MovzPats0<CPURegs, FGR64, MOVZ_I_D64, SLT, SLTu, SLTi, SLTiu>;
   defm : MovzPats0<CPU64Regs, FGR64, MOVZ_I_D64, SLT64, SLTu64, SLTi64,
                    SLTiu64>;
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 65f2f3c445..610575d60d 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -15,14 +15,14 @@
 
 #include "Mips.h"
 #include "MipsTargetMachine.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
 
 using namespace llvm;
 
@@ -112,7 +112,7 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) {
     if (I->hasDelaySlot()) {
       ++FilledSlots;
       Changed = true;
-
+      InstrIter InstrWithSlot = I;
       InstrIter D;
 
       // Delay slot filling is disabled at -O0.
@@ -127,9 +127,9 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB) {
       // The instruction after it will be visited in the next iteration.
       LastFiller = ++I;
 
-      // Set InsideBundle bit so that the machine verifier doesn't expect this
-      // instruction to be a terminator.
-      LastFiller->setIsInsideBundle();
+      // Bundle the delay slot filler to InstrWithSlot so that the machine
+      // verifier doesn't expect this instruction to be a terminator.
+      MIBundleBuilder(MBB, InstrWithSlot, llvm::next(LastFiller));
      }
   return Changed;
 
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index 2cad2a6264..e3cdab427d 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -12,20 +12,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsFrameLowering.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "MipsAnalyzeImmediate.h"
 #include "MipsInstrInfo.h"
 #include "MipsMachineFunction.h"
 #include "MipsTargetMachine.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Function.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index c925ab437f..0a8c0b7994 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -13,28 +13,28 @@
 
 #define DEBUG_TYPE "mips-isel"
 #include "Mips.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "MipsAnalyzeImmediate.h"
 #include "MipsMachineFunction.h"
 #include "MipsRegisterInfo.h"
 #include "MipsSubtarget.h"
 #include "MipsTargetMachine.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Type.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 61e255ecdb..89b9cd150e 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -14,18 +14,14 @@
 
 #define DEBUG_TYPE "mips-lower"
 #include "MipsISelLowering.h"
+#include "InstPrinter/MipsInstPrinter.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "MipsMachineFunction.h"
+#include "MipsSubtarget.h"
 #include "MipsTargetMachine.h"
 #include "MipsTargetObjectFile.h"
-#include "MipsSubtarget.h"
-#include "InstPrinter/MipsInstPrinter.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/CallingConv.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CallingConv.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -33,6 +29,10 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -169,7 +169,6 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::BuildPairF64:      return "MipsISD::BuildPairF64";
   case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
   case MipsISD::Wrapper:           return "MipsISD::Wrapper";
-  case MipsISD::DynAlloc:          return "MipsISD::DynAlloc";
   case MipsISD::Sync:              return "MipsISD::Sync";
   case MipsISD::Ext:               return "MipsISD::Ext";
   case MipsISD::Ins:               return "MipsISD::Ins";
@@ -470,7 +469,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
   maxStoresPerMemcpy = 16;
 }
 
-bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+bool
+MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
   MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
 
   if (Subtarget->inMips16Mode())
@@ -479,6 +479,8 @@ bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
   switch (SVT) {
   case MVT::i64:
   case MVT::i32:
+    if (Fast)
+      *Fast = true;
     return true;
   default:
     return false;
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index b57dff2d39..08165a036c 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -149,7 +149,7 @@ namespace llvm {
 
     virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
 
-    virtual bool allowsUnalignedMemoryAccesses (EVT VT) const;
+    virtual bool allowsUnalignedMemoryAccesses (EVT VT, bool *Fast) const;
 
     virtual void LowerOperationWrapper(SDNode *N,
                                        SmallVectorImpl<SDValue> &Results,
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 3d6c1ca43f..5daed320d2 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -121,15 +121,15 @@ class FPIdxStore<bits<6> funct, string opstr, RegisterClass DRC,
 multiclass FFR1_W_M<bits<6> funct, string opstr> {
   def _S   : FFR1<funct, 16, opstr, "w.s", FGR32, FGR32>;
   def _D32 : FFR1<funct, 17, opstr, "w.d", FGR32, AFGR64>,
-             Requires<[NotFP64bit, HasStandardEncoding]>;
+             Requires<[NotFP64bit, HasStdEnc]>;
   def _D64 : FFR1<funct, 17, opstr, "w.d", FGR32, FGR64>,
-             Requires<[IsFP64bit, HasStandardEncoding]> {
+             Requires<[IsFP64bit, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
   }
 }
 
 // Instructions that convert an FP value to 64-bit fixed point.
-let Predicates = [IsFP64bit, HasStandardEncoding], DecoderNamespace = "Mips64" in
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in
 multiclass FFR1_L_M<bits<6> funct, string opstr> {
   def _S   : FFR1<funct, 16, opstr, "l.s", FGR64, FGR32>;
   def _D64 : FFR1<funct, 17, opstr, "l.d", FGR64, FGR64>;
@@ -139,9 +139,9 @@ multiclass FFR1_L_M<bits<6> funct, string opstr> {
 multiclass FFR1P_M<bits<6> funct, string opstr, SDNode OpNode> {
   def _S   : FFR1P<funct, 16, opstr, "s", FGR32, FGR32, OpNode>;
   def _D32 : FFR1P<funct, 17, opstr, "d", AFGR64, AFGR64, OpNode>,
-             Requires<[NotFP64bit, HasStandardEncoding]>;
+             Requires<[NotFP64bit, HasStdEnc]>;
   def _D64 : FFR1P<funct, 17, opstr, "d", FGR64, FGR64, OpNode>,
-             Requires<[IsFP64bit, HasStandardEncoding]> {
+             Requires<[IsFP64bit, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
   }
 }
@@ -150,9 +150,9 @@ multiclass FFR2P_M<bits<6> funct, string opstr, SDNode OpNode, bit isComm = 0> {
   let isCommutable = isComm in {
   def _S   : FFR2P<funct, 16, opstr, "s", FGR32, OpNode>;
   def _D32 : FFR2P<funct, 17, opstr, "d", AFGR64, OpNode>,
-             Requires<[NotFP64bit, HasStandardEncoding]>;
+             Requires<[NotFP64bit, HasStdEnc]>;
   def _D64 : FFR2P<funct, 17, opstr, "d", FGR64, OpNode>,
-             Requires<[IsFP64bit, HasStandardEncoding]> {
+             Requires<[IsFP64bit, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
   }
 }
@@ -189,13 +189,13 @@ def CVT_S_W : FFR1<0x20, 20, "cvt", "s.w", FGR32, FGR32>, NeverHasSideEffects;
 def CVT_L_S : FFR1<0x25, 16, "cvt", "l.s", FGR64, FGR32>, NeverHasSideEffects;
 def CVT_L_D64: FFR1<0x25, 17, "cvt", "l.d", FGR64, FGR64>, NeverHasSideEffects;
 
-let Predicates = [NotFP64bit, HasStandardEncoding], neverHasSideEffects = 1 in {
+let Predicates = [NotFP64bit, HasStdEnc], neverHasSideEffects = 1 in {
   def CVT_S_D32 : FFR1<0x20, 17, "cvt", "s.d", FGR32, AFGR64>;
   def CVT_D32_W : FFR1<0x21, 20, "cvt", "d.w", AFGR64, FGR32>;
   def CVT_D32_S : FFR1<0x21, 16, "cvt", "d.s", AFGR64, FGR32>;
 }
 
-let Predicates = [IsFP64bit, HasStandardEncoding], DecoderNamespace = "Mips64",
+let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64",
     neverHasSideEffects = 1 in {
  def CVT_S_D64 : FFR1<0x20, 17, "cvt", "s.d", FGR32, FGR64>;
  def CVT_S_L   : FFR1<0x20, 21, "cvt", "s.l", FGR32, FGR64>;
@@ -204,7 +204,7 @@ let Predicates = [IsFP64bit, HasStandardEncoding], DecoderNamespace = "Mips64",
  def CVT_D64_L : FFR1<0x21, 21, "cvt", "d.l", FGR64, FGR64>;
 }
 
-let Predicates = [NoNaNsFPMath, HasStandardEncoding] in {
+let Predicates = [NoNaNsFPMath, HasStdEnc] in {
   defm FABS    : FFR1P_M<0x5, "abs",  fabs>;
   defm FNEG    : FFR1P_M<0x7, "neg",  fneg>;
 }
@@ -247,14 +247,14 @@ def DMTC1 : FFRGPR<0x05, (outs FGR64:$fs), (ins CPU64Regs:$rt),
 
 def FMOV_S   : FFR1<0x6, 16, "mov", "s", FGR32, FGR32>;
 def FMOV_D32 : FFR1<0x6, 17, "mov", "d", AFGR64, AFGR64>,
-               Requires<[NotFP64bit, HasStandardEncoding]>;
+               Requires<[NotFP64bit, HasStdEnc]>;
 def FMOV_D64 : FFR1<0x6, 17, "mov", "d", FGR64, FGR64>,
-               Requires<[IsFP64bit, HasStandardEncoding]> {
+               Requires<[IsFP64bit, HasStdEnc]> {
   let DecoderNamespace = "Mips64";
 }
 
 /// Floating Point Memory Instructions
-let Predicates = [IsN64, HasStandardEncoding], DecoderNamespace = "Mips64" in {
+let Predicates = [IsN64, HasStdEnc], DecoderNamespace = "Mips64" in {
   def LWC1_P8   : FPLoad<0x31, "lwc1", FGR32, mem64>;
   def SWC1_P8   : FPStore<0x39, "swc1", FGR32, mem64>;
   def LDC164_P8 : FPLoad<0x35, "ldc1", FGR64, mem64> {
@@ -265,18 +265,18 @@ let Predicates = [IsN64, HasStandardEncoding], DecoderNamespace = "Mips64" in {
   }
 }
 
-let Predicates = [NotN64, HasStandardEncoding] in {
+let Predicates = [NotN64, HasStdEnc] in {
   def LWC1   : FPLoad<0x31, "lwc1", FGR32, mem>;
   def SWC1   : FPStore<0x39, "swc1", FGR32, mem>;
 }
 
-let Predicates = [NotN64, HasMips64, HasStandardEncoding],
+let Predicates = [NotN64, HasMips64, HasStdEnc],
   DecoderNamespace = "Mips64" in {
   def LDC164 : FPLoad<0x35, "ldc1", FGR64, mem>;
   def SDC164 : FPStore<0x3d, "sdc1", FGR64, mem>;
 }
 
-let Predicates = [NotN64, NotMips64, HasStandardEncoding] in {
+let Predicates = [NotN64, NotMips64, HasStdEnc] in {
   def LDC1   : FPLoad<0x35, "ldc1", AFGR64, mem>;
   def SDC1   : FPStore<0x3d, "sdc1", AFGR64, mem>;
 }
@@ -307,12 +307,12 @@ let Predicates = [IsN64, IsNotNaCl/*@LOCALMOD*/], isCodeGenOnly=1 in {
 }
 
 // Load/store doubleword indexed unaligned.
-let Predicates = [NotMips64, HasStandardEncoding] in {
+let Predicates = [NotMips64, HasStdEnc] in {
   def LUXC1 : FPIdxLoad<0x5, "luxc1", AFGR64, CPURegs>;
   def SUXC1 : FPIdxStore<0xd, "suxc1", AFGR64, CPURegs>;
 }
 
-let Predicates = [HasMips64, HasStandardEncoding],
+let Predicates = [HasMips64, HasStdEnc],
   DecoderNamespace="Mips64" in {
   def LUXC164 : FPIdxLoad<0x5, "luxc1", FGR64, CPURegs>;
   def SUXC164 : FPIdxStore<0xd, "suxc1", FGR64, CPURegs>;
@@ -324,32 +324,32 @@ defm FDIV : FFR2P_M<0x03, "div", fdiv>;
 defm FMUL : FFR2P_M<0x02, "mul", fmul, 1>;
 defm FSUB : FFR2P_M<0x01, "sub", fsub>;
 
-let Predicates = [HasMips32r2, HasStandardEncoding] in {
+let Predicates = [HasMips32r2, HasStdEnc] in {
   def MADD_S : FMADDSUB<0x4, 0, "madd", "s", fadd, FGR32>;
   def MSUB_S : FMADDSUB<0x5, 0, "msub", "s", fsub, FGR32>;
 }
 
-let Predicates = [HasMips32r2, NoNaNsFPMath, HasStandardEncoding] in {
+let Predicates = [HasMips32r2, NoNaNsFPMath, HasStdEnc] in {
   def NMADD_S : FNMADDSUB<0x6, 0, "nmadd", "s", fadd, FGR32>;
   def NMSUB_S : FNMADDSUB<0x7, 0, "nmsub", "s", fsub, FGR32>;
 }
 
-let Predicates = [HasMips32r2, NotFP64bit, HasStandardEncoding] in {
+let Predicates = [HasMips32r2, NotFP64bit, HasStdEnc] in {
   def MADD_D32 : FMADDSUB<0x4, 1, "madd", "d", fadd, AFGR64>;
   def MSUB_D32 : FMADDSUB<0x5, 1, "msub", "d", fsub, AFGR64>;
 }
 
-let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStandardEncoding] in {
+let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStdEnc] in {
   def NMADD_D32 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, AFGR64>;
   def NMSUB_D32 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, AFGR64>;
 }
 
-let Predicates = [HasMips32r2, IsFP64bit, HasStandardEncoding], isCodeGenOnly=1 in {
+let Predicates = [HasMips32r2, IsFP64bit, HasStdEnc], isCodeGenOnly=1 in {
   def MADD_D64 : FMADDSUB<0x4, 1, "madd", "d", fadd, FGR64>;
   def MSUB_D64 : FMADDSUB<0x5, 1, "msub", "d", fsub, FGR64>;
 }
 
-let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStandardEncoding],
+let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStdEnc],
     isCodeGenOnly=1 in {
   def NMADD_D64 : FNMADDSUB<0x6, 1, "nmadd", "d", fadd, FGR64>;
   def NMSUB_D64 : FNMADDSUB<0x7, 1, "nmsub", "d", fsub, FGR64>;
@@ -408,9 +408,9 @@ class FCMP<bits<5> fmt, RegisterClass RC, string typestr> :
 let Defs=[FCR31] in {
   def FCMP_S32 : FCMP<0x10, FGR32, "s">;
   def FCMP_D32 : FCMP<0x11, AFGR64, "d">,
-      Requires<[NotFP64bit, HasStandardEncoding]>;
+      Requires<[NotFP64bit, HasStdEnc]>;
   def FCMP_D64 : FCMP<0x11, FGR64, "d">,
-      Requires<[IsFP64bit, HasStandardEncoding]> {
+      Requires<[IsFP64bit, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
   }
 }
@@ -445,7 +445,7 @@ def : MipsPat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>;
 def : MipsPat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>;
 def : MipsPat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>;
 
-let Predicates = [NotFP64bit, HasStandardEncoding] in {
+let Predicates = [NotFP64bit, HasStdEnc] in {
   def : MipsPat<(f64 (sint_to_fp CPURegs:$src)),
                 (CVT_D32_W (MTC1 CPURegs:$src))>;
   def : MipsPat<(i32 (fp_to_sint AFGR64:$src)),
@@ -454,7 +454,7 @@ let Predicates = [NotFP64bit, HasStandardEncoding] in {
   def : MipsPat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>;
 }
 
-let Predicates = [IsFP64bit, HasStandardEncoding] in {
+let Predicates = [IsFP64bit, HasStdEnc] in {
   def : MipsPat<(f64 fpimm0), (DMTC1 ZERO_64)>;
   def : MipsPat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>;
 
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 1ecbdc2474..c3c12c1fea 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -76,7 +76,7 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
 class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern,
              InstrItinClass itin, Format f>:
   MipsInst<outs, ins, asmstr, pattern, itin, f> {
-  let Predicates = [HasStandardEncoding];
+  let Predicates = [HasStdEnc];
 }
 
 // Mips Pseudo Instructions Format
@@ -89,7 +89,7 @@ class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
 // Mips32/64 Pseudo Instruction Format
 class PseudoSE<dag outs, dag ins, string asmstr, list<dag> pattern>:
   MipsPseudo<outs, ins, asmstr, pattern> {
-  let Predicates = [HasStandardEncoding];
+  let Predicates = [HasStdEnc];
 }
 
 // Pseudo-instructions for alternate assembly syntax (never used by codegen).
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index ca80d43f36..76644c1584 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -11,16 +11,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MipsAnalyzeImmediate.h"
 #include "MipsInstrInfo.h"
-#include "MipsTargetMachine.h"
-#include "MipsMachineFunction.h"
 #include "InstPrinter/MipsInstPrinter.h"
+#include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/STLExtras.h"
 
 #define GET_INSTRINFO_CTOR
 #include "MipsGenInstrInfo.inc"
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 874eb136c0..8f8ab86e70 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -33,8 +33,6 @@ def SDT_MipsDivRem       : SDTypeProfile<0, 2,
 
 def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
 
-def SDT_MipsDynAlloc    : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>,
-                                               SDTCisSameAs<0, 1>]>;
 def SDT_Sync             : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
 
 def SDT_Ext : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
@@ -111,10 +109,6 @@ def MipsDivRemU   : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
 
 def MipsWrapper    : SDNode<"MipsISD::Wrapper", SDTIntBinOp>;
 
-// Pointer to dynamically allocated stack area.
-def MipsDynAlloc  : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
-                           [SDNPHasChain, SDNPInGlue]>;
-
 def MipsSync : SDNode<"MipsISD::Sync", SDT_Sync, [SDNPHasChain,SDNPSideEffect]>;
 
 def MipsExt :  SDNode<"MipsISD::Ext", SDT_Ext>;
@@ -172,13 +166,13 @@ def RelocPIC    :     Predicate<"TM.getRelocationModel() == Reloc::PIC_">,
                       AssemblerPredicate<"FeatureMips32">;
 def NoNaNsFPMath :    Predicate<"TM.Options.NoNaNsFPMath">,
                       AssemblerPredicate<"FeatureMips32">;
-def HasStandardEncoding : Predicate<"Subtarget.hasStandardEncoding()">,
-                          AssemblerPredicate<"!FeatureMips16">;
+def HasStdEnc :       Predicate<"Subtarget.hasStandardEncoding()">,
+                      AssemblerPredicate<"!FeatureMips16">;
 def IsNaCl       :    Predicate<"Subtarget.isTargetNaCl()">;
 def IsNotNaCl    :    Predicate<"!Subtarget.isTargetNaCl()">;
 
 class MipsPat<dag pattern, dag result> : Pat<pattern, result> {
-  let Predicates = [HasStandardEncoding];
+  let Predicates = [HasStdEnc];
 }
 
 class IsBranch {
@@ -463,9 +457,9 @@ class StoreM<bits<6> op, string instr_asm, PatFrag OpNode, RegisterClass RC,
 multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
                    bit Pseudo = 0> {
   def #NAME# : LoadM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>,
-               Requires<[NotN64, HasStandardEncoding]>;
+               Requires<[NotN64, HasStdEnc]>;
   def _P8    : LoadM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
-               Requires<[IsN64, HasStandardEncoding]> {
+               Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
   }
@@ -475,9 +469,9 @@ multiclass LoadM32<bits<6> op, string instr_asm, PatFrag OpNode,
 multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode,
                    bit Pseudo = 0> {
   def #NAME# : LoadM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>,
-               Requires<[NotN64, HasStandardEncoding]>;
+               Requires<[NotN64, HasStdEnc]>;
   def _P8    : LoadM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
-               Requires<[IsN64, HasStandardEncoding]> {
+               Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
   }
@@ -487,9 +481,9 @@ multiclass LoadM64<bits<6> op, string instr_asm, PatFrag OpNode,
 multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode,
                     bit Pseudo = 0> {
   def #NAME# : StoreM<op, instr_asm, OpNode, CPURegs, mem, Pseudo>,
-               Requires<[NotN64, HasStandardEncoding]>;
+               Requires<[NotN64, HasStdEnc]>;
   def _P8    : StoreM<op, instr_asm, OpNode, CPURegs, mem64, Pseudo>,
-               Requires<[IsN64, HasStandardEncoding]> {
+               Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
   }
@@ -499,9 +493,9 @@ multiclass StoreM32<bits<6> op, string instr_asm, PatFrag OpNode,
 multiclass StoreM64<bits<6> op, string instr_asm, PatFrag OpNode,
                     bit Pseudo = 0> {
   def #NAME# : StoreM<op, instr_asm, OpNode, CPU64Regs, mem, Pseudo>,
-               Requires<[NotN64, HasStandardEncoding]>;
+               Requires<[NotN64, HasStdEnc]>;
   def _P8    : StoreM<op, instr_asm, OpNode, CPU64Regs, mem64, Pseudo>,
-               Requires<[IsN64, HasStandardEncoding]> {
+               Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
   }
@@ -526,9 +520,9 @@ class StoreLeftRight<bits<6> op, string instr_asm, SDNode OpNode,
 // 32-bit load left/right.
 multiclass LoadLeftRightM32<bits<6> op, string instr_asm, SDNode OpNode> {
   def #NAME# : LoadLeftRight<op, instr_asm, OpNode, CPURegs, mem>,
-               Requires<[NotN64, HasStandardEncoding]>;
+               Requires<[NotN64, HasStdEnc]>;
   def _P8    : LoadLeftRight<op, instr_asm, OpNode, CPURegs, mem64>,
-               Requires<[IsN64, HasStandardEncoding]> {
+               Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
   }
@@ -537,9 +531,9 @@ multiclass LoadLeftRightM32<bits<6> op, string instr_asm, SDNode OpNode> {
 // 64-bit load left/right.
 multiclass LoadLeftRightM64<bits<6> op, string instr_asm, SDNode OpNode> {
   def #NAME# : LoadLeftRight<op, instr_asm, OpNode, CPU64Regs, mem>,
-               Requires<[NotN64, HasStandardEncoding]>;
+               Requires<[NotN64, HasStdEnc]>;
   def _P8    : LoadLeftRight<op, instr_asm, OpNode, CPU64Regs, mem64>,
-               Requires<[IsN64, HasStandardEncoding]> {
+               Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
   }
@@ -548,9 +542,9 @@ multiclass LoadLeftRightM64<bits<6> op, string instr_asm, SDNode OpNode> {
 // 32-bit store left/right.
 multiclass StoreLeftRightM32<bits<6> op, string instr_asm, SDNode OpNode> {
   def #NAME# : StoreLeftRight<op, instr_asm, OpNode, CPURegs, mem>,
-               Requires<[NotN64, HasStandardEncoding]>;
+               Requires<[NotN64, HasStdEnc]>;
   def _P8    : StoreLeftRight<op, instr_asm, OpNode, CPURegs, mem64>,
-               Requires<[IsN64, HasStandardEncoding]> {
+               Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
   }
@@ -559,9 +553,9 @@ multiclass StoreLeftRightM32<bits<6> op, string instr_asm, SDNode OpNode> {
 // 64-bit store left/right.
 multiclass StoreLeftRightM64<bits<6> op, string instr_asm, SDNode OpNode> {
   def #NAME# : StoreLeftRight<op, instr_asm, OpNode, CPU64Regs, mem>,
-               Requires<[NotN64, HasStandardEncoding]>;
+               Requires<[NotN64, HasStdEnc]>;
   def _P8    : StoreLeftRight<op, instr_asm, OpNode, CPU64Regs, mem64>,
-               Requires<[IsN64, HasStandardEncoding]> {
+               Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
     let isCodeGenOnly = 1;
   }
@@ -629,7 +623,7 @@ class UncondBranch<bits<6> op, string instr_asm>:
   let isTerminator = 1;
   let isBarrier = 1;
   let hasDelaySlot = 1;
-  let Predicates = [RelocPIC, HasStandardEncoding];
+  let Predicates = [RelocPIC, HasStdEnc];
   let Defs = [AT];
 }
 
@@ -743,7 +737,7 @@ class CountLeading0<bits<6> func, string instr_asm, RegisterClass RC>:
   FR<0x1c, func, (outs RC:$rd), (ins RC:$rs),
      !strconcat(instr_asm, "\t$rd, $rs"),
      [(set RC:$rd, (ctlz RC:$rs))], IIAlu>,
-     Requires<[HasBitCount, HasStandardEncoding]> {
+     Requires<[HasBitCount, HasStdEnc]> {
   let shamt = 0;
   let rt = rd;
 }
@@ -752,7 +746,7 @@ class CountLeading1<bits<6> func, string instr_asm, RegisterClass RC>:
   FR<0x1c, func, (outs RC:$rd), (ins RC:$rs),
      !strconcat(instr_asm, "\t$rd, $rs"),
      [(set RC:$rd, (ctlz (not RC:$rs)))], IIAlu>,
-     Requires<[HasBitCount, HasStandardEncoding]> {
+     Requires<[HasBitCount, HasStdEnc]> {
   let shamt = 0;
   let rt = rd;
 }
@@ -765,7 +759,7 @@ class SignExtInReg<bits<5> sa, string instr_asm, ValueType vt,
      [(set RC:$rd, (sext_inreg RC:$rt, vt))], NoItinerary> {
   let rs = 0;
   let shamt = sa;
-  let Predicates = [HasSEInReg, HasStandardEncoding];
+  let Predicates = [HasSEInReg, HasStdEnc];
 }
 
 // Subword Swap
@@ -774,7 +768,7 @@ class SubwordSwap<bits<6> func, bits<5> sa, string instr_asm, RegisterClass RC>:
      !strconcat(instr_asm, "\t$rd, $rt"), [], NoItinerary> {
   let rs = 0;
   let shamt = sa;
-  let Predicates = [HasSwap, HasStandardEncoding];
+  let Predicates = [HasSwap, HasStdEnc];
   let neverHasSideEffects = 1;
 }
 
@@ -795,7 +789,7 @@ class ExtBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
   bits<5> sz;
   let rd = sz;
   let shamt = pos;
-  let Predicates = [HasMips32r2, HasStandardEncoding];
+  let Predicates = [HasMips32r2, HasStdEnc];
 }
 
 class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
@@ -808,7 +802,7 @@ class InsBase<bits<6> _funct, string instr_asm, RegisterClass RC>:
   bits<5> sz;
   let rd = sz;
   let shamt = pos;
-  let Predicates = [HasMips32r2, HasStandardEncoding];
+  let Predicates = [HasMips32r2, HasStdEnc];
   let Constraints = "$src = $rt";
 }
 
@@ -821,9 +815,9 @@ class Atomic2Ops<PatFrag Op, string Opstr, RegisterClass DRC,
 
 multiclass Atomic2Ops32<PatFrag Op, string Opstr> {
   def #NAME# : Atomic2Ops<Op, Opstr, CPURegs, CPURegs>,
-                          Requires<[NotN64, HasStandardEncoding]>;
+                          Requires<[NotN64, HasStdEnc]>;
   def _P8    : Atomic2Ops<Op, Opstr, CPURegs, CPU64Regs>,
-                          Requires<[IsN64, HasStandardEncoding]> {
+                          Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
   }
 }
@@ -837,9 +831,9 @@ class AtomicCmpSwap<PatFrag Op, string Width, RegisterClass DRC,
 
 multiclass AtomicCmpSwap32<PatFrag Op, string Width>  {
   def #NAME# : AtomicCmpSwap<Op, Width, CPURegs, CPURegs>,
-                             Requires<[NotN64, HasStandardEncoding]>;
+                             Requires<[NotN64, HasStdEnc]>;
   def _P8    : AtomicCmpSwap<Op, Width, CPURegs, CPU64Regs>,
-                             Requires<[IsN64, HasStandardEncoding]> {
+                             Requires<[IsN64, HasStdEnc]> {
     let DecoderNamespace = "Mips64";
   }
 }
@@ -997,7 +991,7 @@ def SRLV    : shift_rotate_reg<0x06, 0x00, "srlv", srl, CPURegs>;
 def SRAV    : shift_rotate_reg<0x07, 0x00, "srav", sra, CPURegs>;
 
 // Rotate Instructions
-let Predicates = [HasMips32r2, HasStandardEncoding] in {
+let Predicates = [HasMips32r2, HasStdEnc] in {
     def ROTR    : shift_rotate_imm32<0x02, 0x01, "rotr", rotr>;
     def ROTRV   : shift_rotate_reg<0x06, 0x01, "rotrv", rotr, CPURegs>;
 }
@@ -1032,22 +1026,22 @@ def SYNC : InstSE<(outs), (ins i32imm:$stype), "sync $stype",
 
 /// Load-linked, Store-conditional
 def LL    : LLBase<0x30, "ll", CPURegs, mem>,
-            Requires<[NotN64, HasStandardEncoding]>;
+            Requires<[NotN64, HasStdEnc]>;
 def LL_P8 : LLBase<0x30, "ll", CPURegs, mem64>,
-            Requires<[IsN64, HasStandardEncoding]> {
+            Requires<[IsN64, HasStdEnc]> {
   let DecoderNamespace = "Mips64";
 }
 
 def SC    : SCBase<0x38, "sc", CPURegs, mem>,
-            Requires<[NotN64, HasStandardEncoding]>;
+            Requires<[NotN64, HasStdEnc]>;
 def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>,
-            Requires<[IsN64, HasStandardEncoding]> {
+            Requires<[IsN64, HasStdEnc]> {
   let DecoderNamespace = "Mips64";
 }
 
 /// Jump and Branch Instructions
 def J       : JumpFJ<0x02, jmptarget, "j", br, bb>,
-              Requires<[RelocStatic, HasStandardEncoding]>, IsBranch;
+              Requires<[RelocStatic, HasStdEnc]>, IsBranch;
 def JR      : IndirectBranch<CPURegs>;
 def B       : UncondBranch<0x04, "b">;
 def BEQ     : CBranch<0x04, "beq", seteq, CPURegs>;
@@ -1102,12 +1096,6 @@ let addr=0 in
 // can be matched. It's similar to Sparc LEA_ADDRi
 def LEA_ADDiu : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>;
 
-// DynAlloc node points to dynamically allocated stack space.
-// $sp is added to the list of implicitly used registers to prevent dead code
-// elimination from removing instructions that modify $sp.
-let Uses = [SP] in
-def DynAlloc : EffectiveAddress<0x09,"addiu\t$rt, $addr", CPURegs, mem_ea>;
-
 // MADD*/MSUB*
 def MADD  : MArithR<0, "madd", MipsMAdd, 1>;
 def MADDU : MArithR<1, "maddu", MipsMAddu, 1>;
@@ -1117,7 +1105,7 @@ def MSUBU : MArithR<5, "msubu", MipsMSubu>;
 // MUL is a assembly macro in the current used ISAs. In recent ISA's
 // it is a real instruction.
 def MUL   : ArithLogicR<0x1c, 0x02, "mul", mul, IIImul, CPURegs, 1>,
-            Requires<[HasMips32, HasStandardEncoding]>;
+            Requires<[HasStdEnc]>;
 
 def RDHWR : ReadHardware<CPURegs, HWRegs>;
 
@@ -1230,22 +1218,22 @@ def : MipsPat<(not CPURegs:$in),
               (NOR CPURegs:$in, ZERO)>;
 
 // extended loads
-let Predicates = [NotN64, HasStandardEncoding] in {
+let Predicates = [NotN64, HasStdEnc] in {
   def : MipsPat<(i32 (extloadi1  addr:$src)), (LBu addr:$src)>;
   def : MipsPat<(i32 (extloadi8  addr:$src)), (LBu addr:$src)>;
   def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu addr:$src)>;
 }
-let Predicates = [IsN64, HasStandardEncoding] in {
+let Predicates = [IsN64, HasStdEnc] in {
   def : MipsPat<(i32 (extloadi1  addr:$src)), (LBu_P8 addr:$src)>;
   def : MipsPat<(i32 (extloadi8  addr:$src)), (LBu_P8 addr:$src)>;
   def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu_P8 addr:$src)>;
 }
 
 // peepholes
-let Predicates = [NotN64, HasStandardEncoding] in {
+let Predicates = [NotN64, HasStdEnc] in {
   def : MipsPat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
 }
-let Predicates = [IsN64, HasStandardEncoding] in {
+let Predicates = [IsN64, HasStdEnc] in {
   def : MipsPat<(store (i32 0), addr:$dst), (SW_P8 ZERO, addr:$dst)>;
 }
 
@@ -1322,9 +1310,6 @@ defm : SetgtPats<CPURegs, SLT, SLTu>;
 defm : SetgePats<CPURegs, SLT, SLTu>;
 defm : SetgeImmPats<CPURegs, SLTi, SLTiu>;
 
-// select MipsDynAlloc
-def : MipsPat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
-
 // bswap pattern
 def : MipsPat<(bswap CPURegs:$rt), (ROTR (WSBH CPURegs:$rt), 16)>;
 
diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp
index 052046a8a4..5fd600ebc9 100644
--- a/lib/Target/Mips/MipsJITInfo.cpp
+++ b/lib/Target/Mips/MipsJITInfo.cpp
@@ -16,12 +16,12 @@
 #include "MipsInstrInfo.h"
 #include "MipsRelocations.h"
 #include "MipsSubtarget.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Memory.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cstdlib>
 using namespace llvm;
 
@@ -222,10 +222,17 @@ void *MipsJITInfo::emitFunctionStub(const Function *F, void *Fn,
   // addiu t9, t9, %lo(EmittedAddr)
   // jalr t8, t9
   // nop
-  JCE.emitWordLE(0xf << 26 | 25 << 16 | Hi);
-  JCE.emitWordLE(9 << 26 | 25 << 21 | 25 << 16 | Lo);
-  JCE.emitWordLE(25 << 21 | 24 << 11 | 9);
-  JCE.emitWordLE(0);
+  if (IsLittleEndian) {
+    JCE.emitWordLE(0xf << 26 | 25 << 16 | Hi);
+    JCE.emitWordLE(9 << 26 | 25 << 21 | 25 << 16 | Lo);
+    JCE.emitWordLE(25 << 21 | 24 << 11 | 9);
+    JCE.emitWordLE(0);
+  } else {
+    JCE.emitWordBE(0xf << 26 | 25 << 16 | Hi);
+    JCE.emitWordBE(9 << 26 | 25 << 21 | 25 << 16 | Lo);
+    JCE.emitWordBE(25 << 21 | 24 << 11 | 9);
+    JCE.emitWordBE(0);
+  }
 
   sys::Memory::InvalidateInstructionCache(Addr, 16);
   if (!sys::Memory::setRangeExecutable(Addr, 16))
diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h
index 637a318660..ecda3101a0 100644
--- a/lib/Target/Mips/MipsJITInfo.h
+++ b/lib/Target/Mips/MipsJITInfo.h
@@ -26,10 +26,11 @@ class MipsTargetMachine;
 class MipsJITInfo : public TargetJITInfo {
 
   bool IsPIC;
+  bool IsLittleEndian;
 
   public:
     explicit MipsJITInfo() :
-      IsPIC(false) {}
+      IsPIC(false), IsLittleEndian(true) {}
 
     /// replaceMachineCodeForFunction - Make it so that calling the function
     /// whose machine code is at OLD turns into a call to NEW, perhaps by
@@ -58,8 +59,10 @@ class MipsJITInfo : public TargetJITInfo {
                           unsigned NumRelocs, unsigned char *GOTBase);
 
     /// Initialize - Initialize internal stage for the function being JITted.
-    void Initialize(const MachineFunction &MF, bool isPIC) {
+    void Initialize(const MachineFunction &MF, bool isPIC,
+                    bool isLittleEndian) {
       IsPIC = isPIC;
+      IsLittleEndian = isLittleEndian;
     }
 
 };
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 6cd887b0eb..1d53a1508e 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -19,8 +19,8 @@
 #define DEBUG_TYPE "mips-long-branch"
 
 #include "Mips.h"
-#include "MipsTargetMachine.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
+#include "MipsTargetMachine.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -284,9 +284,10 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
         .addReg(Mips::SP).addImm(-8);
       BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SW)).addReg(Mips::RA)
         .addReg(Mips::SP).addImm(0);
-      BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB);
-      BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LUi), Mips::AT).addImm(Hi)
-        ->setIsInsideBundle();
+
+      MIBundleBuilder(*LongBrMBB, Pos)
+        .append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB))
+        .append(BuildMI(*MF, DL, TII->get(Mips::LUi), Mips::AT).addImm(Hi));
 
       Pos = BalTgtMBB->begin();
 
@@ -296,9 +297,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
         .addReg(Mips::RA).addReg(Mips::AT);
       BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LW), Mips::RA)
         .addReg(Mips::SP).addImm(0);
-      BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR)).addReg(Mips::AT);
-      BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP)
-        .addReg(Mips::SP).addImm(8)->setIsInsideBundle();
+
+      MIBundleBuilder(*BalTgtMBB, Pos)
+        .append(BuildMI(*MF, DL, TII->get(Mips::JR)).addReg(Mips::AT))
+        .append(BuildMI(*MF, DL, TII->get(Mips::ADDiu), Mips::SP)
+                .addReg(Mips::SP).addImm(8));
     } else {
       // $longbr:
       //  daddiu $sp, $sp, -16
@@ -336,9 +339,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
         .addReg(Mips::AT_64).addImm(16);
       BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64)
         .addReg(Mips::AT_64).addImm(Hi);
-      BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB);
-      BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64)
-        .addReg(Mips::AT_64).addImm(16)->setIsInsideBundle();
+
+      MIBundleBuilder(*LongBrMBB, Pos)
+        .append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB))
+        .append(BuildMI(*MF, DL, TII->get(Mips::DSLL), Mips::AT_64)
+                .addReg(Mips::AT_64).addImm(16));
 
       Pos = BalTgtMBB->begin();
 
@@ -348,9 +353,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
         .addReg(Mips::RA_64).addReg(Mips::AT_64);
       BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LD), Mips::RA_64)
         .addReg(Mips::SP_64).addImm(0);
-      BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64);
-      BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::SP_64)
-        .addReg(Mips::SP_64).addImm(16)->setIsInsideBundle();
+
+      MIBundleBuilder(*BalTgtMBB, Pos)
+        .append(BuildMI(*MF, DL, TII->get(Mips::JR64)).addReg(Mips::AT_64))
+        .append(BuildMI(*MF, DL, TII->get(Mips::DADDiu), Mips::SP_64)
+                .addReg(Mips::SP_64).addImm(16));
     }
 
     assert(BalTgtMBBSize == BalTgtMBB->size());
@@ -363,8 +370,9 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
     //
     Pos = LongBrMBB->begin();
     LongBrMBB->addSuccessor(TgtMBB);
-    BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::J)).addMBB(TgtMBB);
-    BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::NOP))->setIsInsideBundle();
+    MIBundleBuilder(*LongBrMBB, Pos)
+      .append(BuildMI(*MF, DL, TII->get(Mips::J)).addMBB(TgtMBB))
+      .append(BuildMI(*MF, DL, TII->get(Mips::NOP)));
 
     assert(LongBrMBB->size() == LongBranchSeqSize);
   }
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index f783af0e9a..e0d884dfda 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -12,9 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 #include "MipsMCInstLower.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "MipsAsmPrinter.h"
 #include "MipsInstrInfo.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index 5ff19aba02..11eef6591c 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -8,12 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsMachineFunction.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "MipsInstrInfo.h"
 #include "MipsSubtarget.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Function.h"
 #include "llvm/Support/CommandLine.h"
 
 using namespace llvm;
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index bb45f92f18..eb6e1cff49 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -15,8 +15,8 @@
 #define MIPS_MACHINE_FUNCTION_INFO_H
 
 #include "MipsSubtarget.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include <utility>
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 35f1a5b6fc..36d8971579 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -17,25 +17,25 @@
 #include "Mips.h"
 #include "MipsAnalyzeImmediate.h"
 #include "MipsInstrInfo.h"
-#include "MipsSubtarget.h"
 #include "MipsMachineFunction.h"
+#include "MipsSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Constants.h"
 #include "llvm/DebugInfo.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Type.h"
 
 #define GET_REGINFO_TARGET_DESC
 #include "MipsGenRegisterInfo.inc"
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 391c19e07e..f07a10c3dd 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -19,52 +19,43 @@ def sub_hi     : SubRegIndex;
 }
 
 // We have banks of 32 registers each.
-class MipsReg<string n> : Register<n> {
-  field bits<5> Num;
+class MipsReg<bits<16> Enc, string n> : Register<n> {
+  let HWEncoding = Enc;
   let Namespace = "Mips";
 }
 
-class MipsRegWithSubRegs<string n, list<Register> subregs>
+class MipsRegWithSubRegs<bits<16> Enc, string n, list<Register> subregs>
   : RegisterWithSubRegs<n, subregs> {
-  field bits<5> Num;
+  let HWEncoding = Enc;
   let Namespace = "Mips";
 }
 
 // Mips CPU Registers
-class MipsGPRReg<bits<5> num, string n> : MipsReg<n> {
-  let Num = num;
-}
+class MipsGPRReg<bits<16> Enc, string n> : MipsReg<Enc, n>;
 
 // Mips 64-bit CPU Registers
-class Mips64GPRReg<bits<5> num, string n, list<Register> subregs>
-  : MipsRegWithSubRegs<n, subregs> {
-  let Num = num;
+class Mips64GPRReg<bits<16> Enc, string n, list<Register> subregs>
+  : MipsRegWithSubRegs<Enc, n, subregs> {
   let SubRegIndices = [sub_32];
 }
 
 // Mips 32-bit FPU Registers
-class FPR<bits<5> num, string n> : MipsReg<n> {
-  let Num = num;
-}
+class FPR<bits<16> Enc, string n> : MipsReg<Enc, n>;
 
 // Mips 64-bit (aliased) FPU Registers
-class AFPR<bits<5> num, string n, list<Register> subregs>
-  : MipsRegWithSubRegs<n, subregs> {
-  let Num = num;
+class AFPR<bits<16> Enc, string n, list<Register> subregs>
+  : MipsRegWithSubRegs<Enc, n, subregs> {
   let SubRegIndices = [sub_fpeven, sub_fpodd];
   let CoveredBySubRegs = 1;
 }
 
-class AFPR64<bits<5> num, string n, list<Register> subregs>
-  : MipsRegWithSubRegs<n, subregs> {
-  let Num = num;
+class AFPR64<bits<16> Enc, string n, list<Register> subregs>
+  : MipsRegWithSubRegs<Enc, n, subregs> {
   let SubRegIndices = [sub_32];
 }
 
 // Mips Hardware Registers
-class HWR<bits<5> num, string n> : MipsReg<n> {
-  let Num = num;
-}
+class HWR<bits<16> Enc, string n> : MipsReg<Enc, n>;
 
 //===----------------------------------------------------------------------===//
 //  Registers
@@ -239,21 +230,21 @@ let Namespace = "Mips" in {
   def FCR31 : Register<"31">;
 
   // fcc0 register
-  def FCC0 : Register<"fcc0">;
+  def FCC0 : MipsReg<0, "fcc0">;
 
   // PC register
   def PC : Register<"pc">;
 
   // Hardware register $29
-  def HWR29 : Register<"29">;
-  def HWR29_64 : Register<"29">;
+  def HWR29 : MipsReg<29, "29">;
+  def HWR29_64 : MipsReg<29, "29">;
 
   // Accum registers
   let SubRegIndices = [sub_lo, sub_hi] in
-  def AC0 : RegisterWithSubRegs<"ac0", [LO, HI]>;
-  def AC1 : Register<"ac1">;
-  def AC2 : Register<"ac2">;
-  def AC3 : Register<"ac3">;
+  def AC0 : MipsRegWithSubRegs<0, "ac0", [LO, HI]>;
+  def AC1 : MipsReg<1, "ac1">;
+  def AC2 : MipsReg<2, "ac2">;
+  def AC3 : MipsReg<3, "ac3">;
 
   def DSPCtrl : Register<"dspctrl">;
 }
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 03f5176b29..f7603a9d74 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -12,11 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsSEFrameLowering.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "MipsAnalyzeImmediate.h"
-#include "MipsSEInstrInfo.h"
 #include "MipsMachineFunction.h"
-#include "MCTargetDesc/MipsBaseInfo.h"
-#include "llvm/Function.h"
+#include "MipsSEInstrInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -24,8 +23,9 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Function.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index fb0f9df038..a714411f22 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -12,14 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsSEInstrInfo.h"
-#include "MipsTargetMachine.h"
-#include "MipsMachineFunction.h"
 #include "InstPrinter/MipsInstPrinter.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/STLExtras.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index 56b9ba95e5..44b1827f8d 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -15,28 +15,28 @@
 #include "MipsSERegisterInfo.h"
 #include "Mips.h"
 #include "MipsAnalyzeImmediate.h"
+#include "MipsMachineFunction.h"
 #include "MipsSEInstrInfo.h"
 #include "MipsSubtarget.h"
-#include "MipsMachineFunction.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Constants.h"
 #include "llvm/DebugInfo.h"
-#include "llvm/Type.h"
 #include "llvm/Function.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Type.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index f464d71fa6..88e2b44791 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -30,8 +30,8 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
   MipsGenSubtargetInfo(TT, CPU, FS),
   MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
   IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
-  IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false),
-  HasMinMax(false), HasSwap(false), HasBitCount(false), HasFPIdx(false),
+  IsLinux(true), HasSEInReg(false), HasCondMov(false), HasSwap(false),
+  HasBitCount(false), HasFPIdx(false),
   InMips16Mode(false), HasDSP(false), HasDSPR2(false), IsAndroid(false)
   // @LOCALMOD-START
   , TargetTriple(TT)
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 44678daaa9..664a5c703b 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -14,8 +14,8 @@
 #ifndef MIPSSUBTARGET_H
 #define MIPSSUBTARGET_H
 
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
@@ -76,13 +76,6 @@ protected:
   // HasCondMov - Conditional mov (MOVZ, MOVN) instructions.
   bool HasCondMov;
 
-  // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu)
-  // instructions.
-  bool HasMulDivAdd;
-
-  // HasMinMax - MIN and MAX instructions.
-  bool HasMinMax;
-
   // HasSwap - Byte and half swap instructions.
   bool HasSwap;
 
@@ -151,8 +144,6 @@ public:
   /// Features related to the presence of specific instructions.
   bool hasSEInReg()   const { return HasSEInReg; }
   bool hasCondMov()   const { return HasCondMov; }
-  bool hasMulDivAdd() const { return HasMulDivAdd; }
-  bool hasMinMax()    const { return HasMinMax; }
   bool hasSwap()      const { return HasSwap; }
   bool hasBitCount()  const { return HasBitCount; }
   bool hasFPIdx()     const { return HasFPIdx; }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 0ed3277306..83b58a9b53 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -15,8 +15,8 @@
 #include "Mips.h"
 #include "MipsFrameLowering.h"
 #include "MipsInstrInfo.h"
-#include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index b54f5cee6d..8b1a06f320 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -15,14 +15,14 @@
 #define MIPSTARGETMACHINE_H
 
 #include "MipsFrameLowering.h"
-#include "MipsInstrInfo.h"
 #include "MipsISelLowering.h"
+#include "MipsInstrInfo.h"
 #include "MipsJITInfo.h"
 #include "MipsSelectionDAGInfo.h"
 #include "MipsSubtarget.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetTransformImpl.h"
 
 namespace llvm {
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index 211e6867a7..59d07dae12 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -9,14 +9,14 @@
 
 #include "MipsTargetObjectFile.h"
 #include "MipsSubtarget.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 static cl::opt<unsigned>
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index a8d082a4d8..b29a2962aa 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -15,11 +15,11 @@
 #ifndef LLVM_TARGET_NVPTX_H
 #define LLVM_TARGET_NVPTX_H
 
-#include "llvm/Value.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "llvm/Module.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
-#include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "llvm/Value.h"
 #include <cassert>
 #include <iosfwd>
 
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index 668c39308f..38c7083fea 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -11,10 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "NVPTXAllocaHoisting.h"
+#include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
-#include "llvm/Constants.h"
-#include "NVPTXAllocaHoisting.h"
 
 namespace llvm {
 
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
index c7cabf6953..768d514dbb 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -15,8 +15,8 @@
 #define NVPTX_ALLOCA_HOISTING_H_
 
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/Pass.h"
 #include "llvm/DataLayout.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 31ab68158c..af07576d59 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -13,36 +13,36 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXAsmPrinter.h"
+#include "MCTargetDesc/NVPTXMCAsmInfo.h"
 #include "NVPTX.h"
 #include "NVPTXInstrInfo.h"
-#include "NVPTXTargetMachine.h"
+#include "NVPTXNumRegisters.h"
 #include "NVPTXRegisterInfo.h"
+#include "NVPTXTargetMachine.h"
 #include "NVPTXUtilities.h"
-#include "MCTargetDesc/NVPTXMCAsmInfo.h"
-#include "NVPTXNumRegisters.h"
+#include "cl_common_defines.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Module.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Support/TimeValue.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Assembly/Writer.h"
-#include "cl_common_defines.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TimeValue.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include <sstream>
 using namespace llvm;
 
@@ -1487,7 +1487,7 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F,
                                             raw_ostream &O) {
   const DataLayout *TD = TM.getDataLayout();
-  const AttrListPtr &PAL = F->getAttributes();
+  const AttributeSet &PAL = F->getAttributes();
   const TargetLowering *TLI = TM.getTargetLowering();
   Function::const_arg_iterator I, E;
   unsigned paramIndex = 0;
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 6488b14425..29db4abe6c 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -16,19 +16,19 @@
 #define NVPTXASMPRINTER_H
 
 #include "NVPTX.h"
-#include "NVPTXTargetMachine.h"
 #include "NVPTXSubtarget.h"
-#include "llvm/Function.h"
+#include "NVPTXTargetMachine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/TargetMachine.h"
 #include <fstream>
 
 // The ptx syntax and format is very different from that usually seem in a .s
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index a9abc00bf3..50072c55f6 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -17,9 +17,9 @@
 #include "NVPTXSubtarget.h"
 #include "NVPTXTargetMachine.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 4e92f0e785..d68e3b6871 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -12,14 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 
-#include "llvm/Instructions.h"
-#include "llvm/Support/raw_ostream.h"
 #include "NVPTXISelDAGToDAG.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/GlobalValue.h"
 
 #undef DEBUG_TYPE
 #define DEBUG_TYPE "nvptx-isel"
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index ccd69b29dd..29e4f17962 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -18,8 +18,8 @@
 #include "NVPTXRegisterInfo.h"
 #include "NVPTXTargetMachine.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Intrinsics.h"
+#include "llvm/Support/Compiler.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index f1a99d77be..4e9d68687a 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -12,29 +12,29 @@
 //===----------------------------------------------------------------------===//
 
 
-#include "NVPTX.h"
 #include "NVPTXISelLowering.h"
+#include "NVPTX.h"
 #include "NVPTXTargetMachine.h"
 #include "NVPTXTargetObjectFile.h"
 #include "NVPTXUtilities.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalValue.h"
-#include "llvm/Module.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/MC/MCSectionELF.h"
 #include <sstream>
 
 #undef DEBUG_TYPE
@@ -271,6 +271,9 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
   }
 }
 
+bool NVPTXTargetLowering::shouldSplitVectorElementType(EVT VT) const {
+  return VT == MVT::i1;
+}
 
 SDValue
 NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
@@ -954,7 +957,7 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
     return false;
 
   const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
-  const std::string TypeName = STy ? STy->getName() : "";
+  const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
 
   for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
     if (TypeName == specialTypes[i])
@@ -973,7 +976,7 @@ NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
   const DataLayout *TD = getDataLayout();
 
   const Function *F = MF.getFunction();
-  const AttrListPtr &PAL = F->getAttributes();
+  const AttributeSet &PAL = F->getAttributes();
 
   SDValue Root = DAG.getRoot();
   std::vector<SDValue> OutChains;
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index 94a177ceb0..0a1833a7c9 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -92,6 +92,8 @@ public:
   virtual unsigned getFunctionAlignment(const Function *F) const;
 
   virtual EVT getSetCCResultType(EVT VT) const {
+    if (VT.isVector())
+      return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
     return MVT::i1;
   }
 
@@ -129,6 +131,8 @@ public:
     return MVT::i32;
   }
 
+  virtual bool shouldSplitVectorElementType(EVT VT) const;
+
 private:
   const NVPTXSubtarget &nvptxSubtarget;  // cache the subtarget here
 
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 9273931e99..8e40c965bf 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -13,6 +13,7 @@
 
 #include "NVPTXLowerAggrCopies.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Function.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/Instructions.h"
@@ -21,7 +22,6 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Support/InstIterator.h"
-#include "llvm/DataLayout.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
index b150c69815..9bafce274e 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -15,9 +15,9 @@
 #ifndef NVPTX_LOWER_AGGR_COPIES_H
 #define NVPTX_LOWER_AGGR_COPIES_H
 
-#include "llvm/Pass.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/DataLayout.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index e3cd46f063..08be917bb7 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -13,13 +13,13 @@
 
 #define DEBUG_TYPE "nvptx-reg-info"
 
-#include "NVPTX.h"
 #include "NVPTXRegisterInfo.h"
+#include "NVPTX.h"
 #include "NVPTXSubtarget.h"
 #include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index f1ca466266..9c832e1b51 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -14,8 +14,8 @@
 #ifndef LLVM_NVPTXSECTION_H
 #define LLVM_NVPTXSECTION_H
 
-#include "llvm/MC/MCSection.h"
 #include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCSection.h"
 #include <vector>
 
 namespace llvm {
diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
index 2836cad4f0..6171292eeb 100644
--- a/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
+++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp
@@ -11,13 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "NVPTXSplitBBatBar.h"
+#include "NVPTXUtilities.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/Support/InstIterator.h"
-#include "NVPTXUtilities.h"
-#include "NVPTXSplitBBatBar.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/NVPTX/NVPTXSplitBBatBar.h b/lib/Target/NVPTX/NVPTXSplitBBatBar.h
index 9e4d5a066d..bdafba9075 100644
--- a/lib/Target/NVPTX/NVPTXSplitBBatBar.h
+++ b/lib/Target/NVPTX/NVPTXSplitBBatBar.h
@@ -15,8 +15,8 @@
 #ifndef NVPTX_SPLIT_BB_AT_BAR_H
 #define NVPTX_SPLIT_BB_AT_BAR_H
 
-#include "llvm/Pass.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/Pass.h"
 
 namespace llvm {
 
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index 3cfd9718e5..e6cb7c2def 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -14,8 +14,8 @@
 #ifndef NVPTXSUBTARGET_H
 #define NVPTXSUBTARGET_H
 
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "NVPTX.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 
 #define GET_SUBTARGETINFO_HEADER
 #include "NVPTXGenSubtargetInfo.inc"
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index cbb490003d..0867a4e01e 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -12,27 +12,31 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXTargetMachine.h"
-#include "NVPTX.h"
-#include "NVPTXSplitBBatBar.h"
-#include "NVPTXLowerAggrCopies.h"
 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
+#include "NVPTX.h"
 #include "NVPTXAllocaHoisting.h"
-#include "llvm/PassManager.h"
+#include "NVPTXLowerAggrCopies.h"
+#include "NVPTXSplitBBatBar.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/DataLayout.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
@@ -41,10 +45,6 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetRegistry.h"
 
 
 using namespace llvm;
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index 11bc9d4fa6..b4763a5bae 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -15,12 +15,12 @@
 #ifndef NVPTX_TARGETMACHINE_H
 #define NVPTX_TARGETMACHINE_H
 
-#include "NVPTXInstrInfo.h"
+#include "ManagedStringPool.h"
+#include "NVPTXFrameLowering.h"
 #include "NVPTXISelLowering.h"
+#include "NVPTXInstrInfo.h"
 #include "NVPTXRegisterInfo.h"
 #include "NVPTXSubtarget.h"
-#include "NVPTXFrameLowering.h"
-#include "ManagedStringPool.h"
 #include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index 3f52251cc1..fff3f43bb0 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -12,10 +12,10 @@
 
 #include "NVPTXUtilities.h"
 #include "NVPTX.h"
-#include "llvm/GlobalVariable.h"
+#include "llvm/Constants.h"
 #include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/Module.h"
-#include "llvm/Constants.h"
 #include "llvm/Operator.h"
 #include <algorithm>
 #include <cstring>
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index fe6ad559e9..5ea7bc8640 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -14,10 +14,10 @@
 #ifndef NVPTXUTILITIES_H
 #define NVPTXUTILITIES_H
 
-#include "llvm/Value.h"
-#include "llvm/GlobalVariable.h"
 #include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Value.h"
 #include <cstdarg>
 #include <set>
 #include <string>
diff --git a/lib/Target/NVPTX/VectorElementize.cpp b/lib/Target/NVPTX/VectorElementize.cpp
index 8043e2de09..e67e2e42b3 100644
--- a/lib/Target/NVPTX/VectorElementize.cpp
+++ b/lib/Target/NVPTX/VectorElementize.cpp
@@ -34,24 +34,24 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "NVPTX.h"
+#include "NVPTXTargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Constant.h"
-#include "llvm/Instructions.h"
 #include "llvm/Function.h"
+#include "llvm/Instructions.h"
 #include "llvm/Pass.h"
-#include "llvm/Type.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "NVPTX.h"
-#include "NVPTXTargetMachine.h"
+#include "llvm/Type.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 187c6b35af..631f749581 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -13,8 +13,8 @@
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCMachObjectWriter.h"
-#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCValue.h"
 #include "llvm/Object/MachOFormat.h"
 #include "llvm/Support/ELF.h"
@@ -31,6 +31,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
   case FK_Data_4:
   case FK_Data_8:
   case PPC::fixup_ppc_toc:
+  case PPC::fixup_ppc_tlsreg:
     return Value;
   case PPC::fixup_ppc_lo14:
   case PPC::fixup_ppc_toc16_ds:
@@ -83,7 +84,8 @@ public:
       { "fixup_ppc_lo14",        16,     14,   0 },
       { "fixup_ppc_toc",          0,     64,   0 },
       { "fixup_ppc_toc16",       16,     16,   0 },
-      { "fixup_ppc_toc16_ds",    16,     14,   0 }
+      { "fixup_ppc_toc16_ds",    16,     14,   0 },
+      { "fixup_ppc_tlsreg",       0,      0,   0 }
     };
 
     if (Kind < FirstTargetFixupKind)
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index dc93f7124a..462d7072b5 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/PPCFixupKinds.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
 #include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 
@@ -82,6 +82,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
       case MCSymbolRefExpr::VK_None:
         Type = ELF::R_PPC_ADDR16_HA;
 	break;
+      case MCSymbolRefExpr::VK_PPC_TOC16_HA:
+        Type = ELF::R_PPC64_TOC16_HA;
+        break;
       }
       break;
     case PPC::fixup_ppc_lo16:
@@ -93,6 +96,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
       case MCSymbolRefExpr::VK_None:
         Type = ELF::R_PPC_ADDR16_LO;
 	break;
+      case MCSymbolRefExpr::VK_PPC_TOC16_LO:
+        Type = ELF::R_PPC64_TOC16_LO;
+        break;
       }
       break;
     case PPC::fixup_ppc_lo14:
@@ -105,7 +111,21 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
       Type = ELF::R_PPC64_TOC16;
       break;
     case PPC::fixup_ppc_toc16_ds:
-      Type = ELF::R_PPC64_TOC16_DS;
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier");
+      case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
+        Type = ELF::R_PPC64_TOC16_DS;
+	break;
+      case MCSymbolRefExpr::VK_PPC_TOC16_LO:
+        Type = ELF::R_PPC64_TOC16_LO_DS;
+        break;
+      case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS:
+        Type = ELF::R_PPC64_GOT_TPREL16_DS;
+        break;
+      }
+      break;
+    case PPC::fixup_ppc_tlsreg:
+      Type = ELF::R_PPC64_TLS;
       break;
     case FK_Data_8:
       switch (Modifier) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 37b265e7fd..75bb851630 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -44,6 +44,9 @@ enum Fixups {
   /// fixup_ppc_toc16_ds - A 14-bit signed fixup relative to the TOC base with
   /// implied 2 zero bits
   fixup_ppc_toc16_ds,
+
+  /// fixup_ppc_tlsreg - Insert thread-pointer register number.
+  fixup_ppc_tlsreg,
   
   // Marker
   LastTargetFixupKind,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 21183024a5..5b208d41f4 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -12,15 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCBaseInfo.h"
 #include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
@@ -61,6 +62,10 @@ public:
                             SmallVectorImpl<MCFixup> &Fixups) const;
   unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
                              SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getTLSOffsetEncoding(const MCInst &MI, unsigned OpNo,
+                                SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
   unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
                                SmallVectorImpl<MCFixup> &Fixups) const;
 
@@ -194,6 +199,31 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
 }
 
 
+unsigned PPCMCCodeEmitter::getTLSOffsetEncoding(const MCInst &MI, unsigned OpNo,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  
+  // Add a fixup for the GOT displacement to the TLS block offset.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_toc16_ds));
+  return 0;
+}
+
+
+unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups);
+  
+  // Add a fixup for the TLS register, which simply provides a relocation
+  // hint to the linker that this statement is part of a relocation sequence.
+  // Return the thread-pointer register's encoding.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_tlsreg));
+  return getPPCRegisterNumbering(PPC::X13);
+}
+
+
 unsigned PPCMCCodeEmitter::
 get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
                     SmallVectorImpl<MCFixup> &Fixups) const {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 6568e82e2b..2209f936ec 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -12,14 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCMCTargetDesc.h"
-#include "PPCMCAsmInfo.h"
 #include "InstPrinter/PPCInstPrinter.h"
-#include "llvm/MC/MachineLocation.h"
+#include "PPCMCAsmInfo.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 
@@ -70,7 +70,7 @@ static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) {
 
   // Initial state of the frame pointer is R1.
   MachineLocation Dst(MachineLocation::VirtualFP);
-  MachineLocation Src(PPC::R1, 0);
+  MachineLocation Src(isPPC64? PPC::X1 : PPC::R1, 0);
   MAI->addInitialFrameState(0, Dst, Src);
 
   return MAI;
@@ -88,6 +88,11 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
     else
       RM = Reloc::Static;
   }
+  if (CM == CodeModel::Default) {
+    Triple T(TT);
+    if (!T.isOSDarwin() && T.getArch() == Triple::ppc64)
+      CM = CodeModel::Medium;
+  }
   X->InitMCCodeGenInfo(RM, CM, OL);
   return X;
 }
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 9103e12325..bbd247f20f 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -65,13 +65,16 @@ namespace llvm {
     MO_NLP_HIDDEN_FLAG = 16,
 
     /// The next are not flags but distinct values.
-    MO_ACCESS_MASK = 224,
+    MO_ACCESS_MASK = 0xe0,
 
     /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
-    MO_LO16 = 32, MO_HA16 = 64,
+    MO_LO16 = 1 << 5,
+    MO_HA16 = 2 << 5,
 
-    MO_TPREL16_HA = 96,
-    MO_TPREL16_LO = 128
+    MO_TPREL16_HA = 3 << 5,
+    MO_TPREL16_LO = 4 << 5,
+    MO_GOT_TPREL16_DS = 5 << 5,
+    MO_TLS = 6 << 5
   };
   } // end namespace PPCII
   
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 3900c8bab4..4315bc1a07 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -18,14 +18,13 @@
 
 #define DEBUG_TYPE "asmprinter"
 #include "PPC.h"
-#include "PPCTargetMachine.h"
-#include "PPCSubtarget.h"
 #include "InstPrinter/PPCInstPrinter.h"
 #include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -33,29 +32,30 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Constants.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/MapVector.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 namespace {
@@ -73,6 +73,7 @@ namespace {
       return "PowerPC Assembly Printer";
     }
 
+    MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
 
     virtual void EmitInstruction(const MachineInstr *MI);
 
@@ -310,6 +311,25 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
 }
 
 
+/// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
+/// exists for it.  If not, create one.  Then return a symbol that references
+/// the TOC entry.
+MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
+
+  MCSymbol *&TOCEntry = TOC[Sym];
+
+  // To avoid name clash check if the name already exists.
+  while (TOCEntry == 0) {
+    if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
+                                "C" + Twine(TOCLabelID++)) == 0) {
+      TOCEntry = GetTempSymbol("C", TOCLabelID);
+    }
+  }
+
+  return TOCEntry;
+}
+
+
 /// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
 /// the current output stream.
 ///
@@ -379,14 +399,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       MOSymbol = GetCPISymbol(MO.getIndex());
     else if (MO.isJTI())
       MOSymbol = GetJTISymbol(MO.getIndex());
-    MCSymbol *&TOCEntry = TOC[MOSymbol];
-    // To avoid name clash check if the name already exists.
-    while (TOCEntry == 0) {
-      if (OutContext.LookupSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
-                                  "C" + Twine(TOCLabelID++)) == 0) {
-        TOCEntry = GetTempSymbol("C", TOCLabelID);
-      }
-    }
+
+    MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
 
     const MCExpr *Exp =
       MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY,
@@ -396,6 +410,126 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
       
+  case PPC::ADDIStocHA: {
+    // Transform %Xd = ADDIStocHA %X2, <ga:@sym>
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+    // Change the opcode to ADDIS8.  If the global address is external,
+    // has common linkage, is a function address, or is a jump table
+    // address, then generate a TOC entry and reference that.  Otherwise
+    // reference the symbol directly.
+    TmpInst.setOpcode(PPC::ADDIS8);
+    const MachineOperand &MO = MI->getOperand(2);
+    assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) &&
+           "Invalid operand for ADDIStocHA!");
+    MCSymbol *MOSymbol = 0;
+    bool IsExternal = false;
+    bool IsFunction = false;
+    bool IsCommon = false;
+
+    if (MO.isGlobal()) {
+      const GlobalValue *GValue = MO.getGlobal();
+      MOSymbol = Mang->getSymbol(GValue);
+      const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GValue);
+      IsExternal = GVar && !GVar->hasInitializer();
+      IsCommon = GVar && GValue->hasCommonLinkage();
+      IsFunction = !GVar;
+    } else if (MO.isCPI())
+      MOSymbol = GetCPISymbol(MO.getIndex());
+    else if (MO.isJTI())
+      MOSymbol = GetJTISymbol(MO.getIndex());
+
+    if (IsExternal || IsFunction || IsCommon || MO.isJTI())
+      MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+
+    const MCExpr *Exp =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_HA,
+                              OutContext);
+    TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case PPC::LDtocL: {
+    // Transform %Xd = LDtocL <ga:@sym>, %Xs
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+    // Change the opcode to LDrs, which is a form of LD with the offset
+    // specified by a SymbolLo.  If the global address is external, has
+    // common linkage, or is a jump table address, then reference the
+    // associated TOC entry.  Otherwise reference the symbol directly.
+    TmpInst.setOpcode(PPC::LDrs);
+    const MachineOperand &MO = MI->getOperand(1);
+    assert((MO.isGlobal() || MO.isJTI()) && "Invalid operand for LDtocL!");
+    MCSymbol *MOSymbol = 0;
+
+    if (MO.isJTI())
+      MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
+    else {
+      const GlobalValue *GValue = MO.getGlobal();
+      MOSymbol = Mang->getSymbol(GValue);
+      const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GValue);
+    
+      if (!GVar || !GVar->hasInitializer() || GValue->hasCommonLinkage())
+        MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+    }
+
+    const MCExpr *Exp =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO,
+                              OutContext);
+    TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case PPC::ADDItocL: {
+    // Transform %Xd = ADDItocL %Xs, <ga:@sym>
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+    // Change the opcode to ADDI8L.  If the global address is external, then
+    // generate a TOC entry and reference that.  Otherwise reference the
+    // symbol directly.
+    TmpInst.setOpcode(PPC::ADDI8L);
+    const MachineOperand &MO = MI->getOperand(2);
+    assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
+    MCSymbol *MOSymbol = 0;
+    bool IsExternal = false;
+    bool IsFunction = false;
+
+    if (MO.isGlobal()) {
+      const GlobalValue *GValue = MO.getGlobal();
+      MOSymbol = Mang->getSymbol(GValue);
+      const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GValue);
+      IsExternal = GVar && !GVar->hasInitializer();
+      IsFunction = !GVar;
+    } else if (MO.isCPI())
+      MOSymbol = GetCPISymbol(MO.getIndex());
+
+    if (IsFunction || IsExternal)
+      MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+
+    const MCExpr *Exp =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO,
+                              OutContext);
+    TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case PPC::LDgotTPREL: {
+    // Transform %Xd = LDgotTPREL <ga:@sym>, %Xs
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+    // Change the opcode to LDrs, which is a form of LD with the offset
+    // specified by a SymbolLo.
+    TmpInst.setOpcode(PPC::LDrs);
+    const MachineOperand &MO = MI->getOperand(1);
+    const GlobalValue *GValue = MO.getGlobal();
+    MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+    const MCExpr *Exp =
+      MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS,
+                              OutContext);
+    TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
   case PPC::MFCRpseud:
   case PPC::MFCR8pseud:
     // Transform: %R3 = MFCRpseud %CR7
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 21a0fb200f..99115752f7 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -17,13 +17,13 @@
 
 #define DEBUG_TYPE "ppc-branch-select"
 #include "PPC.h"
+#include "MCTargetDesc/PPCPredicates.h"
 #include "PPCInstrBuilder.h"
 #include "PPCInstrInfo.h"
-#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 STATISTIC(NumExpanded, "Number of branches expanded to long format");
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 2a2abb171f..05db2c5e73 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -31,20 +31,20 @@
 
 #define DEBUG_TYPE "ctrloops"
 #include "PPC.h"
-#include "PPCTargetMachine.h"
 #include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/Constants.h"
-#include "llvm/PassSupport.h"
+#include "PPCTargetMachine.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Constants.h"
+#include "llvm/PassSupport.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 252a2d159e..0dcb5deb5d 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -12,15 +12,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "PPCTargetMachine.h"
-#include "PPCRelocations.h"
 #include "PPC.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
@@ -68,6 +68,8 @@ namespace {
     unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const;
     unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
     unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getTLSOffsetEncoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
 
     const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
 
@@ -243,6 +245,20 @@ unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
 }
 
 
+unsigned PPCCodeEmitter::getTLSOffsetEncoding(const MachineInstr &MI,
+                                           unsigned OpNo) const {
+  llvm_unreachable("TLS not supported on the old JIT.");
+  return 0;
+}
+
+
+unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI,
+                                           unsigned OpNo) const {
+  llvm_unreachable("TLS not supported on the old JIT.");
+  return 0;
+}
+
+
 unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
                                            const MachineOperand &MO) const {
 
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index caf7bf2be7..fdd85ff11f 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCFrameLowering.h"
-#include "PPCInstrInfo.h"
 #include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
 #include "PPCMachineFunctionInfo.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Function.h"
 #include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 4d957b91c7..3517d8c086 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -15,9 +15,9 @@
 
 #include "PPC.h"
 #include "PPCSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/STLExtras.h"
 
 namespace llvm {
   class PPCSubtarget;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 254fea67fc..abcaa9cab0 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -14,22 +14,23 @@
 
 #define DEBUG_TYPE "ppc-codegen"
 #include "PPC.h"
-#include "PPCTargetMachine.h"
 #include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "PPCTargetMachine.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalValue.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 namespace {
@@ -1268,6 +1269,53 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
                                            Chain), 0);
     return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
   }
+  case PPCISD::TOC_ENTRY: {
+    assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
+
+    // For medium code model, we generate two instructions as described
+    // below.  Otherwise we allow SelectCodeCommon to handle this, selecting
+    // one of LDtoc, LDtocJTI, and LDtocCPT.
+    if (TM.getCodeModel() != CodeModel::Medium)
+      break;
+
+    // The first source operand is a TargetGlobalAddress or a
+    // TargetJumpTable.  If it is an externally defined symbol, a symbol
+    // with common linkage, a function address, or a jump table address,
+    // we generate:
+    //   LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
+    // Otherwise we generate:
+    //   ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
+    SDValue GA = N->getOperand(0);
+    SDValue TOCbase = N->getOperand(1);
+    SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
+                                        TOCbase, GA);
+
+    if (isa<JumpTableSDNode>(GA))
+      return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
+                                    SDValue(Tmp, 0));
+
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
+      const GlobalValue *GValue = G->getGlobal();
+      const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GValue);
+      assert((GVar || isa<Function>(GValue)) &&
+             "Unexpected global value subclass!");
+
+      // An external variable is one without an initializer.  For these,
+      // for variables with common linkage, and for Functions, generate
+      // the LDtocL form.
+      if (!GVar || !GVar->hasInitializer() || GValue->hasCommonLinkage())
+        return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
+                                      SDValue(Tmp, 0));
+    }
+
+    return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
+                                  SDValue(Tmp, 0), GA);
+  }
+  case PPCISD::LD_GOT_TPREL: {
+    assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
+    return CurDAG->getMachineNode(PPC::LDgotTPREL, dl, MVT::i64, 
+                                  N->getOperand(0), N->getOperand(1));
+  }
   }
 
   return SelectCode(N);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 7d97450676..1168171b23 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -12,16 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCISelLowering.h"
+#include "MCTargetDesc/PPCPredicates.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCPerfectShuffle.h"
 #include "PPCTargetMachine.h"
-#include "MCTargetDesc/PPCPredicates.h"
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/CallingConv.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -29,6 +25,10 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -575,6 +575,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
+  case PPCISD::ADDIS_TOC_HA:    return "PPCISD::ADDIS_TOC_HA";
+  case PPCISD::LD_TOC_L:        return "PPCISD::LD_TOC_L";
+  case PPCISD::ADDI_TOC_L:      return "PPCISD::ADDI_TOC_L";
+  case PPCISD::LD_GOT_TPREL:    return "PPCISD::LD_GOT_TPREL";
+  case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
   }
 }
 
@@ -1321,19 +1326,34 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
   EVT PtrVT = getPointerTy();
   bool is64bit = PPCSubTarget.isPPC64();
 
-  TLSModel::Model model = getTargetMachine().getTLSModel(GV);
+  TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
+
+  if (Model == TLSModel::LocalExec) {
+    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                               PPCII::MO_TPREL16_HA);
+    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                               PPCII::MO_TPREL16_LO);
+    SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
+                                     is64bit ? MVT::i64 : MVT::i32);
+    SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
+    return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+  }
+
+  if (!is64bit)
+    llvm_unreachable("only local-exec is currently supported for ppc32");
 
-  SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
-                                             PPCII::MO_TPREL16_HA);
-  SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
-                                             PPCII::MO_TPREL16_LO);
+  if (Model != TLSModel::InitialExec)
+    llvm_unreachable("only local-exec and initial-exec TLS modes supported");
 
-  if (model != TLSModel::LocalExec)
-    llvm_unreachable("only local-exec TLS mode supported");
-  SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
+  SDValue GOTOffset = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                                 PPCII::MO_GOT_TPREL16_DS);
+  SDValue TPReg = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+                                             PPCII::MO_TLS);
+  SDValue GOTReg = DAG.getRegister(is64bit ? PPC::X2  : PPC::R2,
                                    is64bit ? MVT::i64 : MVT::i32);
-  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
-  return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+  SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL, dl, PtrVT,
+                                 GOTOffset, GOTReg);
+  return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TPReg);
 }
 
 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index b3c7f9c28d..f54a8b77a4 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -17,8 +17,8 @@
 
 #include "PPC.h"
 #include "PPCSubtarget.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
   namespace PPCISD {
@@ -178,6 +178,20 @@ namespace llvm {
       CR6SET,
       CR6UNSET,
 
+      /// G8RC = LD_GOT_TPREL Symbol, G8RReg - Used by the initial-exec
+      /// TLS model, produces a LD instruction with base register G8RReg
+      /// and offset sym@got@tprel.  The latter identifies the GOT entry
+      /// containing the offset of "sym" relative to the thread pointer.
+      LD_GOT_TPREL,
+
+      /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
+      /// model, produces an ADD instruction that adds the contents of
+      /// G8RReg to the thread pointer.  Symbol contains a relocation
+      /// sym@tls which is to be replaced by the thread pointer and
+      /// identifies to the linker that the instruction is part of a
+      /// TLS sequence.
+      ADD_TLS,
+
       /// STD_32 - This is the STD instruction for use with "32-bit" registers.
       STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
 
@@ -191,7 +205,21 @@ namespace llvm {
       /// byte-swapping load instruction.  It loads "Type" bits, byte swaps it,
       /// then puts it in the bottom bits of the GPRC.  TYPE can be either i16
       /// or i32.
-      LBRX
+      LBRX,
+
+      /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium code model, produces
+      /// an ADDIS8 instruction that adds the TOC base register to sym@toc@ha.
+      ADDIS_TOC_HA,
+
+      /// G8RC = LD_TOC_L Symbol, G8RReg - For medium code model, produces a
+      /// LD instruction with base register G8RReg and offset sym@toc@l.
+      /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+      LD_TOC_L,
+
+      /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces
+      /// an ADDI8 instruction that adds G8RReg to sym@toc@l.
+      /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+      ADDI_TOC_L
     };
   }
 
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 9711452ec4..dff15664a3 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -32,6 +32,17 @@ def symbolLo64 : Operand<i64> {
 def tocentry : Operand<iPTR> {
   let MIOperandInfo = (ops i32imm:$imm);
 }
+def memrs : Operand<iPTR> {   // memri where the immediate is a symbolLo64
+  let PrintMethod = "printMemRegImm";
+  let EncoderMethod = "getMemRIXEncoding";
+  let MIOperandInfo = (ops symbolLo64:$off, ptr_rc:$reg);
+}
+def tlsaddr : Operand<i64> {
+  let EncoderMethod = "getTLSOffsetEncoding";
+}
+def tlsreg : Operand<i64> {
+  let EncoderMethod = "getTLSRegEncoding";
+}
 
 //===----------------------------------------------------------------------===//
 // 64-bit transformation functions.
@@ -359,6 +370,11 @@ def XORIS8  : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
 def ADD8  : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
                      "add $rT, $rA, $rB", IntSimple,
                      [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
+// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
+// initial-exec thread-local storage model.
+def ADD8TLS  : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB),
+                        "add $rT, $rA, $rB", IntSimple,
+                        [(set G8RC:$rT, (add G8RC:$rA, tglobaltlsaddr:$rB))]>;
                      
 let Defs = [CARRY] in {
 def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
@@ -625,6 +641,12 @@ let canFoldAsLoad = 1, PPC970_Unit = 2 in {
 def LD   : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
                     "ld $rD, $src", LdStLD,
                     [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
+def LDrs : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrs:$src),
+                    "ld $rD, $src", LdStLD,
+                    []>, isPPC64;
+// The following three definitions are selected for small code model only.
+// Otherwise, we need to create two instructions to form a 32-bit offset,
+// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
 def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
                   "#LDtoc",
                   [(set G8RC:$rD,
@@ -671,6 +693,30 @@ def : Pat<(PPCload ixaddr:$src),
 def : Pat<(PPCload xaddr:$src),
           (LDX xaddr:$src)>;
 
+// Support for medium code model.
+def ADDIStocHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp),
+                       "#ADDIStocHA",
+                       [(set G8RC:$rD,
+                         (PPCaddisTocHA G8RC:$reg, tglobaladdr:$disp))]>,
+                       isPPC64;
+def LDtocL: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+                   "#LDtocL",
+                   [(set G8RC:$rD,
+                     (PPCldTocL tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
+def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp),
+                     "#ADDItocL",
+                     [(set G8RC:$rD,
+                       (PPCaddiTocL G8RC:$reg, tglobaladdr:$disp))]>, isPPC64;
+
+// Support for thread-local storage.
+def LDgotTPREL: Pseudo<(outs G8RC:$rD), (ins tlsaddr:$disp, G8RC:$reg),
+                       "#LDgotTPREL",
+                       [(set G8RC:$rD,
+                         (PPCldGotTprel G8RC:$reg, tglobaltlsaddr:$disp))]>,
+                      isPPC64;
+def : Pat<(PPCaddTls G8RC:$in, tglobaltlsaddr:$g),
+          (ADD8TLS G8RC:$in, tglobaltlsaddr:$g)>;
+
 let PPC970_Unit = 2 in {
 // Truncating stores.                       
 def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 87758e90fb..0cf28ae4b5 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -158,10 +158,6 @@ def vecspltisw : PatLeaf<(build_vector), [{
   return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0;
 }], VSPLTISW_get_imm>;
 
-def V_immneg0 : PatLeaf<(build_vector), [{
-  return PPC::isAllNegativeZeroVector(N);
-}]>;
-
 //===----------------------------------------------------------------------===//
 // Helpers for defining instructions that directly correspond to intrinsics.
 
@@ -585,7 +581,12 @@ def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
 def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins),
                       "vxor $vD, $vD, $vD", VecFP,
                       [(set VRRC:$vD, (v4i32 immAllZerosV))]>;
+let IMM=-1 in {
+def V_SETALLONES : VXForm_3<908, (outs VRRC:$vD), (ins),
+                      "vspltisw $vD, -1", VecFP,
+                      [(set VRRC:$vD, (v4i32 immAllOnesV))]>;
 }
+} // VALU Operations.
 
 //===----------------------------------------------------------------------===//
 // Additional Altivec Patterns
@@ -672,7 +673,8 @@ def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))),
           (VANDC VRRC:$A, VRRC:$B)>;
 
 def : Pat<(fmul VRRC:$vA, VRRC:$vB),
-          (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>; 
+          (VMADDFP VRRC:$vA, VRRC:$vB,
+             (v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>; 
 
 // Fused multiply add and multiply sub for packed float.  These are represented
 // separately from the real instructions above, for operations that must have
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index d9d68446f5..a0517a80a9 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -12,12 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
 #include "PPC.h"
+#include "PPCHazardRecognizers.h"
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCTargetMachine.h"
-#include "PPCHazardRecognizers.h"
-#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
@@ -28,7 +29,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/STLExtras.h"
 
 #define GET_INSTRINFO_CTOR
 #include "PPCGenInstrInfo.inc"
@@ -60,7 +60,7 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
     return new PPCScoreboardHazardRecognizer(II, DAG);
   }
 
-  return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
+  return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
 }
 
 /// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
@@ -141,7 +141,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
 
   // Normal instructions can be commuted the obvious way.
   if (MI->getOpcode() != PPC::RLWIMI)
-    return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+    return TargetInstrInfo::commuteInstruction(MI, NewMI);
 
   // Cannot commute if it has a non-zero rotate count.
   if (MI->getOperand(3).getImm() != 0)
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 6ee045a2c7..a29f40ad53 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -91,6 +91,9 @@ def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
 def PPCvmaddfp  : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
 def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
 
+def PPCldGotTprel : SDNode<"PPCISD::LD_GOT_TPREL", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCaddTls     : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
+
 def PPCvperm    : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
 
 // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
@@ -167,6 +170,12 @@ def PPClarx      : SDNode<"PPCISD::LARX", SDT_PPClarx,
 def PPCstcx      : SDNode<"PPCISD::STCX", SDT_PPCstcx,
                           [SDNPHasChain, SDNPMayStore]>;
 
+// Instructions to support medium code model
+def PPCaddisTocHA : SDNode<"PPCISD::ADDIS_TOC_HA", SDTIntBinOp, []>;
+def PPCldTocL     : SDNode<"PPCISD::LD_TOC_L", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCaddiTocL   : SDNode<"PPCISD::ADDI_TOC_L", SDTIntBinOp, []>;
+
+
 // Instructions to support dynamic alloca.
 def SDTDynOp  : SDTypeProfile<1, 2, []>;
 def PPCdynalloc   : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index aba27399d6..62d15b371f 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -16,9 +16,9 @@
 #include "PPCRelocations.h"
 #include "PPCTargetMachine.h"
 #include "llvm/Function.h"
-#include "llvm/Support/Memory.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Memory.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
index 2f8243a597..46d4a08eb6 100644
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ b/lib/Target/PowerPC/PPCJITInfo.h
@@ -14,8 +14,8 @@
 #ifndef POWERPC_JITINFO_H
 #define POWERPC_JITINFO_H
 
-#include "llvm/Target/TargetJITInfo.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Target/TargetJITInfo.h"
 
 namespace llvm {
   class PPCTargetMachine;
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 19ec993ba0..1c4af901f7 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPC.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
@@ -20,7 +21,6 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/ADT/SmallString.h"
 using namespace llvm;
 
 static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
@@ -114,6 +114,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
                                break;
     case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO;
                                break;
+    case PPCII::MO_GOT_TPREL16_DS:
+      RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS;
+      break;
+    case PPCII::MO_TLS:
+      RefKind = MCSymbolRefExpr::VK_PPC_TLS;
+      break;
    }
 
   // FIXME: This isn't right, but we don't have a good way to express this in
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 1f00b3b2b0..8d1ba23c11 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -15,32 +15,32 @@
 #define DEBUG_TYPE "reginfo"
 #include "PPCRegisterInfo.h"
 #include "PPC.h"
+#include "PPCFrameLowering.h"
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
-#include "PPCFrameLowering.h"
 #include "PPCSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Type.h"
 #include <cstdlib>
 
 #define GET_REGINFO_TARGET_DESC
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 9c8cb92cc7..71eff4c52a 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCSubtarget.h"
-#include "PPCRegisterInfo.h"
 #include "PPC.h"
+#include "PPCRegisterInfo.h"
 #include "llvm/GlobalValue.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
 #include <cstdlib>
 
 #define GET_SUBTARGETINFO_TARGET_DESC
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index b9e22f43c3..416c0f3c34 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -14,9 +14,9 @@
 #ifndef POWERPCSUBTARGET_H
 #define POWERPCSUBTARGET_H
 
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 3fc977ee2b..22a78c5f1f 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -13,13 +13,13 @@
 
 #include "PPCTargetMachine.h"
 #include "PPC.h"
-#include "llvm/PassManager.h"
-#include "llvm/MC/MCStreamer.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 static cl::
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index c168433a71..0267ed1f23 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -15,14 +15,14 @@
 #define PPC_TARGETMACHINE_H
 
 #include "PPCFrameLowering.h"
-#include "PPCSubtarget.h"
-#include "PPCJITInfo.h"
-#include "PPCInstrInfo.h"
 #include "PPCISelLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCJITInfo.h"
 #include "PPCSelectionDAGInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/DataLayout.h"
 
 namespace llvm {
 
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index 7bf8c3f85e..6123773d5f 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -14,14 +14,14 @@
 
 #define DEBUG_TYPE "delay-slot-filler"
 #include "Sparc.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp
index 9a729bd870..1325b98cf0 100644
--- a/lib/Target/Sparc/FPMover.cpp
+++ b/lib/Target/Sparc/FPMover.cpp
@@ -14,14 +14,14 @@
 #define DEBUG_TYPE "fpmover"
 #include "Sparc.h"
 #include "SparcSubtarget.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 STATISTIC(NumFpDs , "Number of instructions translated");
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 25548625e7..e14b3cbf16 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -16,15 +16,15 @@
 #include "Sparc.h"
 #include "SparcInstrInfo.h"
 #include "SparcTargetMachine.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index 716c79f43a..874a5d5c51 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -14,15 +14,15 @@
 #include "SparcFrameLowering.h"
 #include "SparcInstrInfo.h"
 #include "SparcMachineFunctionInfo.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Function.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 93710c4e0b..5beed7635f 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "SparcTargetMachine.h"
-#include "llvm/Intrinsics.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 8e5619e6bc..bc6e8ddfd7 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -13,11 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "SparcISelLowering.h"
-#include "SparcTargetMachine.h"
 #include "SparcMachineFunctionInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Module.h"
+#include "SparcTargetMachine.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -25,6 +22,9 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index f8674d0bd6..39d7329f26 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -15,12 +15,12 @@
 #include "Sparc.h"
 #include "SparcMachineFunctionInfo.h"
 #include "SparcSubtarget.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
 
 #define GET_INSTRINFO_CTOR
 #include "SparcGenInstrInfo.inc"
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index ff8d3c533f..303e46213e 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -14,14 +14,14 @@
 #include "SparcRegisterInfo.h"
 #include "Sparc.h"
 #include "SparcSubtarget.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Type.h"
 
 #define GET_REGINFO_TARGET_DESC
 #include "SparcGenRegisterInfo.inc"
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 45c962471d..7c628d7c55 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -12,8 +12,8 @@
 
 #include "SparcTargetMachine.h"
 #include "Sparc.h"
-#include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 0fbe2d7cda..53cf276a3e 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -14,14 +14,14 @@
 #ifndef SPARCTARGETMACHINE_H
 #define SPARCTARGETMACHINE_H
 
-#include "SparcInstrInfo.h"
-#include "SparcISelLowering.h"
 #include "SparcFrameLowering.h"
+#include "SparcISelLowering.h"
+#include "SparcInstrInfo.h"
 #include "SparcSelectionDAGInfo.h"
 #include "SparcSubtarget.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetTransformImpl.h"
 
 namespace llvm {
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 393178a469..8571bc0fa1 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -14,11 +14,11 @@
 
 #include "llvm-c/Target.h"
 #include "llvm-c/Initialization.h"
+#include "llvm/DataLayout.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/PassManager.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/LLVMContext.h"
 #include <cstring>
 
 using namespace llvm;
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
deleted file mode 100644
index f1d1d07c38..0000000000
--- a/lib/Target/TargetInstrInfo.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/Support/ErrorHandling.h"
-#include <cctype>
-using namespace llvm;
-
-//===----------------------------------------------------------------------===//
-//  TargetInstrInfo
-//
-// Methods that depend on CodeGen are implemented in
-// TargetInstrInfoImpl.cpp. Invoking them without linking libCodeGen raises a
-// link error.
-// ===----------------------------------------------------------------------===//
-
-TargetInstrInfo::~TargetInstrInfo() {
-}
-
-const TargetRegisterClass*
-TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
-                             const TargetRegisterInfo *TRI,
-                             const MachineFunction &MF) const {
-  if (OpNum >= MCID.getNumOperands())
-    return 0;
-
-  short RegClass = MCID.OpInfo[OpNum].RegClass;
-  if (MCID.OpInfo[OpNum].isLookupPtrRegClass())
-    return TRI->getPointerRegClass(MF, RegClass);
-
-  // Instructions like INSERT_SUBREG do not have fixed register classes.
-  if (RegClass < 0)
-    return 0;
-
-  // Otherwise just look it up normally.
-  return TRI->getRegClass(RegClass);
-}
-
-/// insertNoop - Insert a noop into the instruction stream at the specified
-/// point.
-void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MI) const {
-  llvm_unreachable("Target didn't implement insertNoop!");
-}
-
-/// Measure the specified inline asm to determine an approximation of its
-/// length.
-/// Comments (which run till the next SeparatorString or newline) do not
-/// count as an instruction.
-/// Any other non-whitespace text is considered an instruction, with
-/// multiple instructions separated by SeparatorString or newlines.
-/// Variable-length instructions are not handled here; this function
-/// may be overloaded in the target code to do that.
-unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
-                                             const MCAsmInfo &MAI) const {
-
-
-  // Count the number of instructions in the asm.
-  bool atInsnStart = true;
-  unsigned Length = 0;
-  for (; *Str; ++Str) {
-    if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
-                                strlen(MAI.getSeparatorString())) == 0)
-      atInsnStart = true;
-    if (atInsnStart && !std::isspace(*Str)) {
-      Length += MAI.getMaxInstLength();
-      atInsnStart = false;
-    }
-    if (atInsnStart && strncmp(Str, MAI.getCommentString(),
-                               strlen(MAI.getCommentString())) == 0)
-      atInsnStart = false;
-  }
-
-  return Length;
-}
diff --git a/lib/Target/TargetIntrinsicInfo.cpp b/lib/Target/TargetIntrinsicInfo.cpp
index e049a1d3b6..8e8fd93801 100644
--- a/lib/Target/TargetIntrinsicInfo.cpp
+++ b/lib/Target/TargetIntrinsicInfo.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Function.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Function.h"
 using namespace llvm;
 
 TargetIntrinsicInfo::TargetIntrinsicInfo() {
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 62ce86e292..1ca47d023f 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalVariable.h"
@@ -21,13 +22,12 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index ffc1d9f0d1..efc873d55a 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/GlobalAlias.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
 using namespace llvm;
 
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index f69c2abd50..10c770b135 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -11,17 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm-c/TargetMachine.h"
 #include "llvm-c/Core.h"
 #include "llvm-c/Target.h"
-#include "llvm-c/TargetMachine.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/CodeGen.h"
-#include "llvm/Support/FormattedStream.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
 #include <cassert>
 #include <cstdlib>
 #include <cstring>
diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp
index b36e6f858f..7f9fdfc938 100644
--- a/lib/Target/TargetTransformImpl.cpp
+++ b/lib/Target/TargetTransformImpl.cpp
@@ -283,7 +283,7 @@ unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
  }
 
 unsigned VectorTargetTransformImpl::getCFInstrCost(unsigned Opcode) const {
-  return 1;
+  return 0;
 }
 
 unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode,
@@ -347,6 +347,25 @@ VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
 }
 
 unsigned
+VectorTargetTransformImpl::getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy,
+                                                 ArrayRef<Type*> Tys) const {
+  // assume that we need to scalarize this intrinsic.
+  unsigned ScalarizationCost = 0;
+  unsigned ScalarCalls = 1;
+  if (RetTy->isVectorTy()) {
+    ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
+    ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
+  }
+  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
+    if (Tys[i]->isVectorTy()) {
+      ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
+      ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
+    }
+  }
+  return ScalarCalls + ScalarizationCost;
+}
+
+unsigned
 VectorTargetTransformImpl::getNumberOfParts(Type *Tp) const {
   std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Tp);
   return LT.first;
diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
index 66ad353709..b12399d447 100644
--- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
@@ -8,12 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/X86BaseInfo.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCTargetAsmLexer.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/SmallVector.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 79f7c00960..fdb7583ed0 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -8,21 +8,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "MCTargetDesc/X86BaseInfo.h"
-#include "llvm/MC/MCTargetAsmParser.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index f13692739a..ed61c01130 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -16,11 +16,10 @@
 
 #include "X86Disassembler.h"
 #include "X86DisassemblerDecoder.h"
-
 #include "llvm/MC/EDInstInfo.h"
-#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index a4bd1147bc..e357710b20 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -14,12 +14,12 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "X86ATTInstPrinter.h"
-#include "X86InstComments.h"
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
-#include "llvm/MC/MCInst.h"
+#include "X86InstComments.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -131,7 +131,7 @@ void X86ATTInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
                                       raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isImm())
-    O << Op.getImm();
+    O << formatImm(Op.getImm());
   else {
     assert(Op.isExpr() && "unknown pcrel immediate operand");
     // If a symbolic branch target was added as a constant expression then print
@@ -157,7 +157,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
   } else if (Op.isImm()) {
     // Print X86 immediates as signed values.
     O << markup("<imm:")
-      << '$' << (int64_t)Op.getImm()
+      << '$' << formatImm((int64_t)Op.getImm())
       << markup(">");
     
     if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
@@ -189,7 +189,7 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
   if (DispSpec.isImm()) {
     int64_t DispVal = DispSpec.getImm();
     if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
-      O << DispVal;
+      O << formatImm(DispVal);
   } else {
     assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
     O << *DispSpec.getExpr();
@@ -207,7 +207,7 @@ void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
       if (ScaleVal != 1) {
         O << ','
 	  << markup("<imm:")
-          << ScaleVal
+          << ScaleVal // never printed in hex.
 	  << markup(">");
       }
     }
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index d67aec7f10..141f4a4dd8 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -14,11 +14,11 @@
 
 #define DEBUG_TYPE "asm-printer"
 #include "X86IntelInstPrinter.h"
-#include "X86InstComments.h"
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
-#include "llvm/MC/MCInst.h"
+#include "X86InstComments.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 287c9f137a..f66b203f0d 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -12,19 +12,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86MCTargetDesc.h"
-#include "X86MCAsmInfo.h"
 #include "InstPrinter/X86ATTInstPrinter.h"
 #include "InstPrinter/X86IntelInstPrinter.h"
-#include "llvm/MC/MachineLocation.h"
+#include "X86MCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCCodeGenInfo.h"
 #include "llvm/MC/MCInstrAnalysis.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Support/Host.h"
+#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/TargetRegistry.h"
 
 #define GET_REGINFO_MC_DESC
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 7ff058edbc..64f005c469 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -7,18 +7,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "MCTargetDesc/X86FixupKinds.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
-#include "llvm/MC/MCAssembler.h"
+#include "MCTargetDesc/X86FixupKinds.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCMachObjectWriter.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCValue.h"
-#include "llvm/ADT/Twine.h"
+#include "llvm/Object/MachOFormat.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
-#include "llvm/Object/MachOFormat.h"
 
 using namespace llvm;
 using namespace llvm::object;
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 8ad0bc08ac..e3c22d9c3b 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -182,17 +182,17 @@ def : Proc<"westmere",        [FeatureSSE42, FeatureCMPXCHG16B,
 // Sandy Bridge
 // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
 // rather than a superset.
-def : Proc<"corei7-avx",      [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT,
-                               FeatureAES, FeaturePCLMUL]>;
+def : Proc<"corei7-avx",      [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+                               FeaturePOPCNT, FeatureAES, FeaturePCLMUL]>;
 // Ivy Bridge
-def : Proc<"core-avx-i",      [FeatureAVX, FeatureCMPXCHG16B, FeaturePOPCNT,
-                               FeatureAES, FeaturePCLMUL,
+def : Proc<"core-avx-i",      [FeatureAVX, FeatureCMPXCHG16B, FeatureFastUAMem,
+                               FeaturePOPCNT, FeatureAES, FeaturePCLMUL,
                                FeatureRDRAND, FeatureF16C, FeatureFSGSBase]>;
 
 // Haswell
-def : Proc<"core-avx2",       [FeatureAVX2, FeatureCMPXCHG16B, FeaturePOPCNT,
-                               FeatureAES, FeaturePCLMUL, FeatureRDRAND,
-                               FeatureF16C, FeatureFSGSBase,
+def : Proc<"core-avx2",       [FeatureAVX2, FeatureCMPXCHG16B, FeatureFastUAMem,
+                               FeaturePOPCNT, FeatureAES, FeaturePCLMUL,
+                               FeatureRDRAND, FeatureF16C, FeatureFSGSBase,
                                FeatureMOVBE, FeatureLZCNT, FeatureBMI,
                                FeatureBMI2, FeatureFMA,
                                FeatureRTM]>;
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 9a63060c90..9cbc4ea649 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -13,33 +13,33 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86AsmPrinter.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
 #include "X86.h"
 #include "X86COFFMachineModuleInfo.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86TargetMachine.h"
-#include "InstPrinter/X86ATTInstPrinter.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/CallingConv.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/CodeGen/MachineModuleInfoImpls.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Module.h"
 #include "llvm/Support/COFF.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index a5a8dc18e4..0dfeb42f1a 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -15,8 +15,8 @@
 #define X86COFF_MACHINEMODULEINFO_H
 
 #include "X86MachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 
 namespace llvm {
   class X86MachineFunctionInfo;
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index ee6408b403..8dbff74e51 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -13,23 +13,23 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "jit"
+#include "X86.h"
 #include "X86InstrInfo.h"
 #include "X86JITInfo.h"
+#include "X86Relocations.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
-#include "X86Relocations.h"
-#include "X86.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/PassManager.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 8e415c7d19..3201eb9cc0 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -14,24 +14,24 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86.h"
-#include "X86InstrBuilder.h"
 #include "X86ISelLowering.h"
+#include "X86InstrBuilder.h"
 #include "X86RegisterInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
 #include "llvm/CallingConv.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Operator.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
@@ -1645,6 +1645,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
     return X86VisitIntrinsicCall(*II);
 
+  // Allow SelectionDAG isel to handle tail calls.
+  if (cast<CallInst>(I)->isTailCall())
+    return false;
+
   return DoSelectCall(I, 0);
 }
 
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 50d0fb5c98..16197e09e9 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -26,17 +26,17 @@
 #define DEBUG_TYPE "x86-codegen"
 #include "X86.h"
 #include "X86InstrInfo.h"
-#include "llvm/InlineAsm.h"
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/EdgeBundles.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 5bfb5054b0..48dc67da38 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -17,18 +17,18 @@
 #include "X86MachineFunctionInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
-#include "llvm/Function.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index ca45d005d5..a35b9e0e1f 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -19,21 +19,21 @@
 #include "X86RegisterInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index bf51a6b6ba..e05b43add8 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -14,20 +14,16 @@
 
 #define DEBUG_TYPE "x86-isel"
 #include "X86ISelLowering.h"
+#include "Utils/X86ShuffleDecode.h"
 #include "X86.h"
 #include "X86InstrBuilder.h"
 #include "X86TargetMachine.h"
 #include "X86TargetObjectFile.h"
-#include "Utils/X86ShuffleDecode.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/VariadicFunction.h"
 #include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/GlobalAlias.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/IntrinsicLowering.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -35,14 +31,18 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/VariadicFunction.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -1041,7 +1041,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::SRA,               MVT::v8i16, Custom);
     setOperationAction(ISD::SRA,               MVT::v16i8, Custom);
 
-    if (Subtarget->hasAVX2()) {
+    if (Subtarget->hasInt256()) {
       setOperationAction(ISD::SRL,             MVT::v2i64, Legal);
       setOperationAction(ISD::SRL,             MVT::v4i32, Legal);
 
@@ -1060,7 +1060,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     }
   }
 
-  if (!TM.Options.UseSoftFloat && Subtarget->hasAVX()) {
+  if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
     addRegisterClass(MVT::v32i8,  &X86::VR256RegClass);
     addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
     addRegisterClass(MVT::v8i32,  &X86::VR256RegClass);
@@ -1144,7 +1144,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
       setOperationAction(ISD::FMA,             MVT::f64, Legal);
     }
 
-    if (Subtarget->hasAVX2()) {
+    if (Subtarget->hasInt256()) {
       setOperationAction(ISD::ADD,             MVT::v4i64, Legal);
       setOperationAction(ISD::ADD,             MVT::v8i32, Legal);
       setOperationAction(ISD::ADD,             MVT::v16i16, Legal);
@@ -1410,9 +1410,9 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
          ((DstAlign == 0 || DstAlign >= 16) &&
           (SrcAlign == 0 || SrcAlign >= 16)))) {
       if (Size >= 32) {
-        if (Subtarget->hasAVX2())
+        if (Subtarget->hasInt256())
           return MVT::v8i32;
-        if (Subtarget->hasAVX())
+        if (Subtarget->hasFp256())
           return MVT::v8f32;
       }
       if (Subtarget->hasSSE2())
@@ -1432,6 +1432,13 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
   return MVT::i32;
 }
 
+bool
+X86TargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
+  if (Fast)
+    *Fast = Subtarget->isUnalignedMemAccessFast();
+  return true;
+}
+
 /// getJumpTableEncoding - Return the entry encoding for a jump table in the
 /// current function.  The returned value is a member of the
 /// MachineJumpTableInfo::JTEntryKind enum.
@@ -3284,9 +3291,7 @@ static bool isUndefOrInRange(int Val, int Low, int Hi) {
 /// isUndefOrEqual - Val is either less than zero (undef) or equal to the
 /// specified value.
 static bool isUndefOrEqual(int Val, int CmpVal) {
-  if (Val < 0 || Val == CmpVal)
-    return true;
-  return false;
+  return (Val < 0 || Val == CmpVal);
 }
 
 /// isSequentialOrUndefInRange - Return true if every element in Mask, beginning
@@ -3313,8 +3318,8 @@ static bool isPSHUFDMask(ArrayRef<int> Mask, EVT VT) {
 
 /// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
 /// is suitable for input to PSHUFHW.
-static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
-  if (VT != MVT::v8i16 && (!HasAVX2 || VT != MVT::v16i16))
+static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+  if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
     return false;
 
   // Lower quadword copied in order or undef.
@@ -3342,8 +3347,8 @@ static bool isPSHUFHWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
 
 /// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
 /// is suitable for input to PSHUFLW.
-static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
-  if (VT != MVT::v8i16 && (!HasAVX2 || VT != MVT::v16i16))
+static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
+  if (VT != MVT::v8i16 && (!HasInt256 || VT != MVT::v16i16))
     return false;
 
   // Upper quadword copied in order.
@@ -3374,7 +3379,7 @@ static bool isPSHUFLWMask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
 static bool isPALIGNRMask(ArrayRef<int> Mask, EVT VT,
                           const X86Subtarget *Subtarget) {
   if ((VT.getSizeInBits() == 128 && !Subtarget->hasSSSE3()) ||
-      (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2()))
+      (VT.getSizeInBits() == 256 && !Subtarget->hasInt256()))
     return false;
 
   unsigned NumElts = VT.getVectorNumElements();
@@ -3461,9 +3466,9 @@ static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask,
 /// specifies a shuffle of elements that is suitable for input to 128/256-bit
 /// SHUFPS and SHUFPD. If Commuted is true, then it checks for sources to be
 /// reverse of what x86 shuffles want.
-static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX,
+static bool isSHUFPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256,
                         bool Commuted = false) {
-  if (!HasAVX && VT.getSizeInBits() == 256)
+  if (!HasFp256 && VT.getSizeInBits() == 256)
     return false;
 
   unsigned NumElems = VT.getVectorNumElements();
@@ -3642,14 +3647,14 @@ SDValue Compact8x32ShuffleNode(ShuffleVectorSDNode *SVOp,
 /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to UNPCKL.
 static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
-                         bool HasAVX2, bool V2IsSplat = false) {
+                         bool HasInt256, bool V2IsSplat = false) {
   unsigned NumElts = VT.getVectorNumElements();
 
   assert((VT.is128BitVector() || VT.is256BitVector()) &&
          "Unsupported vector type for unpckh");
 
   if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
-      (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
+      (!HasInt256 || (NumElts != 16 && NumElts != 32)))
     return false;
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3681,14 +3686,14 @@ static bool isUNPCKLMask(ArrayRef<int> Mask, EVT VT,
 /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to UNPCKH.
 static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
-                         bool HasAVX2, bool V2IsSplat = false) {
+                         bool HasInt256, bool V2IsSplat = false) {
   unsigned NumElts = VT.getVectorNumElements();
 
   assert((VT.is128BitVector() || VT.is256BitVector()) &&
          "Unsupported vector type for unpckh");
 
   if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
-      (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
+      (!HasInt256 || (NumElts != 16 && NumElts != 32)))
     return false;
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3719,14 +3724,14 @@ static bool isUNPCKHMask(ArrayRef<int> Mask, EVT VT,
 /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
 /// <0, 0, 1, 1>
 static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT,
-                                  bool HasAVX2) {
+                                  bool HasInt256) {
   unsigned NumElts = VT.getVectorNumElements();
 
   assert((VT.is128BitVector() || VT.is256BitVector()) &&
          "Unsupported vector type for unpckh");
 
   if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
-      (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
+      (!HasInt256 || (NumElts != 16 && NumElts != 32)))
     return false;
 
   // For 256-bit i64/f64, use MOVDDUPY instead, so reject the matching pattern
@@ -3761,14 +3766,14 @@ static bool isUNPCKL_v_undef_Mask(ArrayRef<int> Mask, EVT VT,
 /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
 /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
 /// <2, 2, 3, 3>
-static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX2) {
+static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, EVT VT, bool HasInt256) {
   unsigned NumElts = VT.getVectorNumElements();
 
   assert((VT.is128BitVector() || VT.is256BitVector()) &&
          "Unsupported vector type for unpckh");
 
   if (VT.getSizeInBits() == 256 && NumElts != 4 && NumElts != 8 &&
-      (!HasAVX2 || (NumElts != 16 && NumElts != 32)))
+      (!HasInt256 || (NumElts != 16 && NumElts != 32)))
     return false;
 
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
@@ -3817,8 +3822,8 @@ static bool isMOVLMask(ArrayRef<int> Mask, EVT VT) {
 ///   vector_shuffle <4, 5, 6, 7, 12, 13, 14, 15>
 /// The first half comes from the second half of V1 and the second half from the
 /// the second half of V2.
-static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
-  if (!HasAVX || !VT.is256BitVector())
+static bool isVPERM2X128Mask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+  if (!HasFp256 || !VT.is256BitVector())
     return false;
 
   // The shuffle result is divided into half A and half B. In total the two
@@ -3877,8 +3882,8 @@ static unsigned getShuffleVPERM2X128Immediate(ShuffleVectorSDNode *SVOp) {
 /// to the same elements of the low, but to the higher half of the source.
 /// In VPERMILPD the two lanes could be shuffled independently of each other
 /// with the same restriction that lanes can't be crossed. Also handles PSHUFDY.
-static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
-  if (!HasAVX)
+static bool isVPERMILPMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+  if (!HasFp256)
     return false;
 
   unsigned NumElts = VT.getVectorNumElements();
@@ -3978,8 +3983,8 @@ static bool isMOVSLDUPMask(ArrayRef<int> Mask, EVT VT,
 /// isMOVDDUPYMask - Return true if the specified VECTOR_SHUFFLE operand
 /// specifies a shuffle of elements that is suitable for input to 256-bit
 /// version of MOVDDUP.
-static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasAVX) {
-  if (!HasAVX || !VT.is256BitVector())
+static bool isMOVDDUPYMask(ArrayRef<int> Mask, EVT VT, bool HasFp256) {
+  if (!HasFp256 || !VT.is256BitVector())
     return false;
 
   unsigned NumElts = VT.getVectorNumElements();
@@ -4384,7 +4389,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
     }
   } else if (Size == 256) { // AVX
-    if (Subtarget->hasAVX2()) { // AVX2
+    if (Subtarget->hasInt256()) { // AVX2
       SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
@@ -4405,7 +4410,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
 /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
 /// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
 /// Then bitcast to their original type, ensuring they get CSE'd.
-static SDValue getOnesVector(EVT VT, bool HasAVX2, SelectionDAG &DAG,
+static SDValue getOnesVector(EVT VT, bool HasInt256, SelectionDAG &DAG,
                              DebugLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
   unsigned Size = VT.getSizeInBits();
@@ -4413,7 +4418,7 @@ static SDValue getOnesVector(EVT VT, bool HasAVX2, SelectionDAG &DAG,
   SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
   SDValue Vec;
   if (Size == 256) {
-    if (HasAVX2) { // AVX2
+    if (HasInt256) { // AVX2
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
     } else { // AVX
@@ -5114,7 +5119,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
 /// or SDValue() otherwise.
 SDValue
 X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
-  if (!Subtarget->hasAVX())
+  if (!Subtarget->hasFp256())
     return SDValue();
 
   EVT VT = Op.getValueType();
@@ -5160,7 +5165,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
       if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
           Sc.getOpcode() != ISD::BUILD_VECTOR) {
 
-        if (!Subtarget->hasAVX2())
+        if (!Subtarget->hasInt256())
           return SDValue();
 
         // Use the register form of the broadcast instruction available on AVX2.
@@ -5187,7 +5192,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
   // Handle the broadcasting a single constant scalar from the constant pool
   // into a vector. On Sandybridge it is still better to load a constant vector
   // from the constant pool and not to broadcast it from a scalar.
-  if (ConstSplatVal && Subtarget->hasAVX2()) {
+  if (ConstSplatVal && Subtarget->hasInt256()) {
     EVT CVT = Ld.getValueType();
     assert(!CVT.isVector() && "Must not broadcast a vector type");
     unsigned ScalarSize = CVT.getSizeInBits();
@@ -5215,7 +5220,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
   unsigned ScalarSize = Ld.getValueType().getSizeInBits();
 
   // Handle AVX2 in-register broadcasts.
-  if (!IsLoad && Subtarget->hasAVX2() &&
+  if (!IsLoad && Subtarget->hasInt256() &&
       (ScalarSize == 32 || (Is256 && ScalarSize == 64)))
     return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
 
@@ -5228,7 +5233,7 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
 
   // The integer check is needed for the 64-bit into 128-bit so it doesn't match
   // double since there is no vbroadcastsd xmm
-  if (Subtarget->hasAVX2() && Ld.getValueType().isInteger()) {
+  if (Subtarget->hasInt256() && Ld.getValueType().isInteger()) {
     if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
       return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
   }
@@ -5333,10 +5338,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
   // vpcmpeqd on 256-bit vectors.
   if (ISD::isBuildVectorAllOnes(Op.getNode())) {
-    if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasAVX2()))
+    if (VT == MVT::v4i32 || (VT == MVT::v8i32 && Subtarget->hasInt256()))
       return Op;
 
-    return getOnesVector(VT, Subtarget->hasAVX2(), DAG, dl);
+    return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
   }
 
   SDValue Broadcast = LowerVectorBroadcast(Op, DAG);
@@ -5673,64 +5678,53 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp,
   SDValue V1 = SVOp->getOperand(0);
   SDValue V2 = SVOp->getOperand(1);
   DebugLoc dl = SVOp->getDebugLoc();
-  MVT VT = SVOp->getValueType(0).getSimpleVT();
+  EVT VT = SVOp->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
   unsigned NumElems = VT.getVectorNumElements();
 
-  if (!Subtarget->hasSSE41())
+  if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
+    return SDValue();
+  if (!Subtarget->hasInt256() && VT == MVT::v16i16)
     return SDValue();
 
-  unsigned ISDNo = 0;
-  MVT OpTy;
-
-  switch (VT.SimpleTy) {
-  default: return SDValue();
-  case MVT::v8i16:
-    ISDNo = X86ISD::BLENDPW;
-    OpTy = MVT::v8i16;
-    break;
-  case MVT::v4i32:
-  case MVT::v4f32:
-    ISDNo = X86ISD::BLENDPS;
-    OpTy = MVT::v4f32;
-    break;
-  case MVT::v2i64:
-  case MVT::v2f64:
-    ISDNo = X86ISD::BLENDPD;
-    OpTy = MVT::v2f64;
-    break;
-  case MVT::v8i32:
-  case MVT::v8f32:
-    if (!Subtarget->hasAVX())
-      return SDValue();
-    ISDNo = X86ISD::BLENDPS;
-    OpTy = MVT::v8f32;
-    break;
-  case MVT::v4i64:
-  case MVT::v4f64:
-    if (!Subtarget->hasAVX())
-      return SDValue();
-    ISDNo = X86ISD::BLENDPD;
-    OpTy = MVT::v4f64;
-    break;
-  }
-  assert(ISDNo && "Invalid Op Number");
+  // Check the mask for BLEND and build the value.
+  unsigned MaskValue = 0;
+  // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
+  unsigned NumLanes = (NumElems-1)/8 + 1; 
+  unsigned NumElemsInLane = NumElems / NumLanes;
 
-  unsigned MaskVals = 0;
+  // Blend for v16i16 should be symetric for the both lanes.
+  for (unsigned i = 0; i < NumElemsInLane; ++i) {
 
-  for (unsigned i = 0; i != NumElems; ++i) {
+    int SndLaneEltIdx = (NumLanes == 2) ? 
+      SVOp->getMaskElt(i + NumElemsInLane) : -1;
     int EltIdx = SVOp->getMaskElt(i);
-    if (EltIdx == (int)i || EltIdx < 0)
-      MaskVals |= (1<<i);
-    else if (EltIdx == (int)(i + NumElems))
-      continue; // Bit is set to zero;
-    else
+
+    if ((EltIdx == -1 || EltIdx == (int)i) && 
+        (SndLaneEltIdx == -1 || SndLaneEltIdx == (int)(i + NumElemsInLane)))
+      continue;
+
+    if (((unsigned)EltIdx == (i + NumElems)) && 
+        (SndLaneEltIdx == -1 || 
+         (unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane))
+      MaskValue |= (1<<i);
+    else 
       return SDValue();
   }
 
-  V1 = DAG.getNode(ISD::BITCAST, dl, OpTy, V1);
-  V2 = DAG.getNode(ISD::BITCAST, dl, OpTy, V2);
-  SDValue Ret =  DAG.getNode(ISDNo, dl, OpTy, V1, V2,
-                             DAG.getConstant(MaskVals, MVT::i32));
+  // Convert i32 vectors to floating point if it is not AVX2.
+  // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
+  EVT BlendVT = VT;
+  if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
+    BlendVT = EVT::getVectorVT(*DAG.getContext(), 
+                              EVT::getFloatingPointVT(EltVT.getSizeInBits()), 
+                              NumElems);
+    V1 = DAG.getNode(ISD::BITCAST, dl, VT, V1);
+    V2 = DAG.getNode(ISD::BITCAST, dl, VT, V2);
+  }
+  
+  SDValue Ret =  DAG.getNode(X86ISD::BLENDI, dl, BlendVT, V1, V2,
+                             DAG.getConstant(MaskValue, MVT::i32));
   return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
 }
 
@@ -6130,7 +6124,7 @@ SDValue LowerVECTOR_SHUFFLEv32i8(ShuffleVectorSDNode *SVOp,
   // (1) one of input vector is undefined or zeroinitializer.
   // The mask value 0x80 puts 0 in the corresponding slot of the vector.
   // And (2) the mask indexes don't cross the 128-bit lane.
-  if (VT != MVT::v32i8 || !Subtarget->hasAVX2() ||
+  if (VT != MVT::v32i8 || !Subtarget->hasInt256() ||
       (!V2IsUndef && !V2IsAllZero && !V1IsAllZero))
     return SDValue();
 
@@ -6503,23 +6497,6 @@ static bool MayFoldVectorLoad(SDValue V) {
   return MayFoldLoad(V);
 }
 
-// FIXME: the version above should always be used. Since there's
-// a bug where several vector shuffles can't be folded because the
-// DAG is not updated during lowering and a node claims to have two
-// uses while it only has one, use this version, and let isel match
-// another instruction if the load really happens to have more than
-// one use. Remove this version after this bug get fixed.
-// rdar://8434668, PR8156
-static bool RelaxedMayFoldVectorLoad(SDValue V) {
-  if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
-    V = V.getOperand(0);
-  if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
-    V = V.getOperand(0);
-  if (ISD::isNormalLoad(V.getNode()))
-    return true;
-  return false;
-}
-
 static
 SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
   EVT VT = Op.getValueType();
@@ -6633,7 +6610,7 @@ X86TargetLowering::lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
 
   // Only AVX2 support 256-bit vector integer extending.
-  if (!Subtarget->hasAVX2() && VT.is256BitVector())
+  if (!Subtarget->hasInt256() && VT.is256BitVector())
     return SDValue();
 
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
@@ -6779,8 +6756,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   bool V1IsSplat = false;
   bool V2IsSplat = false;
   bool HasSSE2 = Subtarget->hasSSE2();
-  bool HasAVX    = Subtarget->hasAVX();
-  bool HasAVX2   = Subtarget->hasAVX2();
+  bool HasFp256    = Subtarget->hasFp256();
+  bool HasInt256   = Subtarget->hasInt256();
   MachineFunction &MF = DAG.getMachineFunction();
   bool OptForSize = MF.getFunction()->getFnAttributes().
     hasAttribute(Attributes::OptimizeForSize);
@@ -6817,20 +6794,20 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
 
   // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
   // unpckh_undef). Only use pshufd if speed is more important than size.
-  if (OptForSize && isUNPCKL_v_undef_Mask(M, VT, HasAVX2))
+  if (OptForSize && isUNPCKL_v_undef_Mask(M, VT, HasInt256))
     return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
-  if (OptForSize && isUNPCKH_v_undef_Mask(M, VT, HasAVX2))
+  if (OptForSize && isUNPCKH_v_undef_Mask(M, VT, HasInt256))
     return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
 
   if (isMOVDDUPMask(M, VT) && Subtarget->hasSSE3() &&
-      V2IsUndef && RelaxedMayFoldVectorLoad(V1))
+      V2IsUndef && MayFoldVectorLoad(V1))
     return getMOVDDup(Op, dl, V1, DAG);
 
   if (isMOVHLPS_v_undef_Mask(M, VT))
     return getMOVHighToLow(Op, dl, DAG);
 
   // Use to match splats
-  if (HasSSE2 && isUNPCKHMask(M, VT, HasAVX2) && V2IsUndef &&
+  if (HasSSE2 && isUNPCKHMask(M, VT, HasInt256) && V2IsUndef &&
       (VT == MVT::v2f64 || VT == MVT::v2i64))
     return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
 
@@ -6843,12 +6820,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
 
     unsigned TargetMask = getShuffleSHUFImmediate(SVOp);
 
-    if (HasAVX && (VT == MVT::v4f32 || VT == MVT::v2f64))
-      return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask, DAG);
-
     if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
       return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
 
+    if (HasFp256 && (VT == MVT::v4f32 || VT == MVT::v2f64))
+      return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1, TargetMask,
+                                  DAG);
+
     return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V1,
                                 TargetMask, DAG);
   }
@@ -6879,7 +6857,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // FIXME: fold these into legal mask.
-  if (isMOVLHPSMask(M, VT) && !isUNPCKLMask(M, VT, HasAVX2))
+  if (isMOVLHPSMask(M, VT) && !isUNPCKLMask(M, VT, HasInt256))
     return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
 
   if (isMOVHLPSMask(M, VT))
@@ -6929,10 +6907,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
     return getMOVL(DAG, dl, VT, V2, V1);
   }
 
-  if (isUNPCKLMask(M, VT, HasAVX2))
+  if (isUNPCKLMask(M, VT, HasInt256))
     return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
 
-  if (isUNPCKHMask(M, VT, HasAVX2))
+  if (isUNPCKHMask(M, VT, HasInt256))
     return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
 
   if (V2IsSplat) {
@@ -6941,9 +6919,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
     // new vector_shuffle with the corrected mask.p
     SmallVector<int, 8> NewMask(M.begin(), M.end());
     NormalizeMask(NewMask, NumElems);
-    if (isUNPCKLMask(NewMask, VT, HasAVX2, true))
+    if (isUNPCKLMask(NewMask, VT, HasInt256, true))
       return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
-    if (isUNPCKHMask(NewMask, VT, HasAVX2, true))
+    if (isUNPCKHMask(NewMask, VT, HasInt256, true))
       return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
   }
 
@@ -6955,15 +6933,15 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
     std::swap(V1IsSplat, V2IsSplat);
     Commuted = false;
 
-    if (isUNPCKLMask(M, VT, HasAVX2))
+    if (isUNPCKLMask(M, VT, HasInt256))
       return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V2, DAG);
 
-    if (isUNPCKHMask(M, VT, HasAVX2))
+    if (isUNPCKHMask(M, VT, HasInt256))
       return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V2, DAG);
   }
 
   // Normalize the node to match x86 shuffle ops if needed
-  if (!V2IsUndef && (isSHUFPMask(M, VT, HasAVX, /* Commuted */ true)))
+  if (!V2IsUndef && (isSHUFPMask(M, VT, HasFp256, /* Commuted */ true)))
     return CommuteVectorShuffle(SVOp, DAG);
 
   // The checks below are all present in isShuffleMaskLegal, but they are
@@ -6981,23 +6959,23 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
       return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
   }
 
-  if (isPSHUFHWMask(M, VT, HasAVX2))
+  if (isPSHUFHWMask(M, VT, HasInt256))
     return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
                                 getShufflePSHUFHWImmediate(SVOp),
                                 DAG);
 
-  if (isPSHUFLWMask(M, VT, HasAVX2))
+  if (isPSHUFLWMask(M, VT, HasInt256))
     return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
                                 getShufflePSHUFLWImmediate(SVOp),
                                 DAG);
 
-  if (isSHUFPMask(M, VT, HasAVX))
+  if (isSHUFPMask(M, VT, HasFp256))
     return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2,
                                 getShuffleSHUFImmediate(SVOp), DAG);
 
-  if (isUNPCKL_v_undef_Mask(M, VT, HasAVX2))
+  if (isUNPCKL_v_undef_Mask(M, VT, HasInt256))
     return getTargetShuffleNode(X86ISD::UNPCKL, dl, VT, V1, V1, DAG);
-  if (isUNPCKH_v_undef_Mask(M, VT, HasAVX2))
+  if (isUNPCKH_v_undef_Mask(M, VT, HasInt256))
     return getTargetShuffleNode(X86ISD::UNPCKH, dl, VT, V1, V1, DAG);
 
   //===--------------------------------------------------------------------===//
@@ -7006,12 +6984,12 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   //
 
   // Handle VMOVDDUPY permutations
-  if (V2IsUndef && isMOVDDUPYMask(M, VT, HasAVX))
+  if (V2IsUndef && isMOVDDUPYMask(M, VT, HasFp256))
     return getTargetShuffleNode(X86ISD::MOVDDUP, dl, VT, V1, DAG);
 
   // Handle VPERMILPS/D* permutations
-  if (isVPERMILPMask(M, VT, HasAVX)) {
-    if (HasAVX2 && VT == MVT::v8i32)
+  if (isVPERMILPMask(M, VT, HasFp256)) {
+    if (HasInt256 && VT == MVT::v8i32)
       return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1,
                                   getShuffleSHUFImmediate(SVOp), DAG);
     return getTargetShuffleNode(X86ISD::VPERMILP, dl, VT, V1,
@@ -7019,7 +6997,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // Handle VPERM2F128/VPERM2I128 permutations
-  if (isVPERM2X128Mask(M, VT, HasAVX))
+  if (isVPERM2X128Mask(M, VT, HasFp256))
     return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
                                 V2, getShuffleVPERM2X128Immediate(SVOp), DAG);
 
@@ -7027,7 +7005,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
   if (BlendOp.getNode())
     return BlendOp;
 
-  if (V2IsUndef && HasAVX2 && (VT == MVT::v8i32 || VT == MVT::v8f32)) {
+  if (V2IsUndef && HasInt256 && (VT == MVT::v8i32 || VT == MVT::v8f32)) {
     SmallVector<SDValue, 8> permclMask;
     for (unsigned i = 0; i != 8; ++i) {
       permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MVT::i32));
@@ -7039,7 +7017,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
                        DAG.getNode(ISD::BITCAST, dl, VT, Mask), V1);
   }
 
-  if (V2IsUndef && HasAVX2 && (VT == MVT::v4i64 || VT == MVT::v4f64))
+  if (V2IsUndef && HasInt256 && (VT == MVT::v4i64 || VT == MVT::v4f64))
     return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1,
                                 getShuffleCLImmediate(SVOp), DAG);
 
@@ -7374,7 +7352,7 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
 // upper bits of a vector.
 static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
                                       SelectionDAG &DAG) {
-  if (Subtarget->hasAVX()) {
+  if (Subtarget->hasFp256()) {
     DebugLoc dl = Op.getNode()->getDebugLoc();
     SDValue Vec = Op.getNode()->getOperand(0);
     SDValue Idx = Op.getNode()->getOperand(1);
@@ -7394,7 +7372,7 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
 // the upper bits of a vector.
 static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
                                      SelectionDAG &DAG) {
-  if (Subtarget->hasAVX()) {
+  if (Subtarget->hasFp256()) {
     DebugLoc dl = Op.getNode()->getDebugLoc();
     SDValue Vec = Op.getNode()->getOperand(0);
     SDValue SubVec = Op.getNode()->getOperand(1);
@@ -8413,10 +8391,10 @@ SDValue X86TargetLowering::lowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const
       VT.getVectorNumElements() != SVT.getVectorNumElements())
     return SDValue();
 
-  assert(Subtarget->hasAVX() && "256-bit vector is observed without AVX!");
+  assert(Subtarget->hasFp256() && "256-bit vector is observed without AVX!");
 
   // AVX2 has better support of integer extending.
-  if (Subtarget->hasAVX2())
+  if (Subtarget->hasInt256())
     return DAG.getNode(X86ISD::VZEXT, DL, VT, In);
 
   SDValue Lo = DAG.getNode(X86ISD::VZEXT, DL, MVT::v4i32, In);
@@ -8436,7 +8414,7 @@ SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
       VT.getVectorNumElements() != SVT.getVectorNumElements())
     return SDValue();
 
-  assert(Subtarget->hasAVX() && "256-bit vector is observed without AVX!");
+  assert(Subtarget->hasFp256() && "256-bit vector is observed without AVX!");
 
   unsigned NumElems = VT.getVectorNumElements();
   EVT NVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
@@ -9025,6 +9003,11 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
   return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
 }
 
+static bool isAllOnes(SDValue V) {
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+  return C && C->isAllOnesValue();
+}
+
 /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
 /// if it's possible.
 SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
@@ -9077,6 +9060,14 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
   }
 
   if (LHS.getNode()) {
+    // If the LHS is of the form (x ^ -1) then replace the LHS with x and flip
+    // the condition code later.
+    bool Invert = false;
+    if (LHS.getOpcode() == ISD::XOR && isAllOnes(LHS.getOperand(1))) {
+      Invert = true;
+      LHS = LHS.getOperand(0);
+    }
+
     // If LHS is i8, promote it to i32 with any_extend.  There is no i8 BT
     // instruction.  Since the shift amount is in-range-or-undefined, we know
     // that doing a bittest on the i32 value is ok.  We extend to i32 because
@@ -9092,7 +9083,10 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
       RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
 
     SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
-    unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+    X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+    // Flip the condition if the LHS was a not instruction
+    if (Invert)
+      Cond = X86::GetOppositeBranchCondition(Cond);
     return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
                        DAG.getConstant(Cond, MVT::i8), BT);
   }
@@ -9263,7 +9257,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // Break 256-bit integer vector compare into smaller ones.
-  if (VT.is256BitVector() && !Subtarget->hasAVX2())
+  if (VT.is256BitVector() && !Subtarget->hasInt256())
     return Lower256IntVSETCC(Op, DAG);
 
   // We are handling one of the integer comparisons here.  Since SSE only has
@@ -9350,11 +9344,6 @@ static bool isZero(SDValue V) {
   return C && C->isNullValue();
 }
 
-static bool isAllOnes(SDValue V) {
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
-  return C && C->isAllOnesValue();
-}
-
 static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
   if (V.getOpcode() != ISD::TRUNCATE)
     return false;
@@ -10917,7 +10906,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
 
       // Check that ECX wasn't needed by an 'inreg' parameter.
       FunctionType *FTy = Func->getFunctionType();
-      const AttrListPtr &Attrs = Func->getAttributes();
+      const AttributeSet &Attrs = Func->getAttributes();
 
       if (!Attrs.isEmpty() && !Func->isVarArg()) {
         unsigned InRegCount = 0;
@@ -11180,7 +11169,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
   EVT VT = Op.getValueType();
 
   // Decompose 256-bit ops into smaller 128-bit ops.
-  if (VT.is256BitVector() && !Subtarget->hasAVX2())
+  if (VT.is256BitVector() && !Subtarget->hasInt256())
     return Lower256IntArith(Op, DAG);
 
   assert((VT == MVT::v2i64 || VT == MVT::v4i64) &&
@@ -11243,7 +11232,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
       uint64_t ShiftAmt = C->getZExtValue();
 
       if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
-          (Subtarget->hasAVX2() &&
+          (Subtarget->hasInt256() &&
            (VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16))) {
         if (Op.getOpcode() == ISD::SHL)
           return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
@@ -11300,7 +11289,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
         llvm_unreachable("Unknown shift opcode.");
       }
 
-      if (Subtarget->hasAVX2() && VT == MVT::v32i8) {
+      if (Subtarget->hasInt256() && VT == MVT::v32i8) {
         if (Op.getOpcode() == ISD::SHL) {
           // Make a large shift.
           SDValue SHL = DAG.getNode(X86ISD::VSHLI, dl, MVT::v16i16, R,
@@ -11543,9 +11532,9 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
     default: return SDValue();
     case MVT::v8i32:
     case MVT::v16i16:
-      if (!Subtarget->hasAVX())
+      if (!Subtarget->hasFp256())
         return SDValue();
-      if (!Subtarget->hasAVX2()) {
+      if (!Subtarget->hasInt256()) {
         // needs to be split
         unsigned NumElems = VT.getVectorNumElements();
 
@@ -12154,9 +12143,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::ANDNP:              return "X86ISD::ANDNP";
   case X86ISD::PSIGN:              return "X86ISD::PSIGN";
   case X86ISD::BLENDV:             return "X86ISD::BLENDV";
-  case X86ISD::BLENDPW:            return "X86ISD::BLENDPW";
-  case X86ISD::BLENDPS:            return "X86ISD::BLENDPS";
-  case X86ISD::BLENDPD:            return "X86ISD::BLENDPD";
+  case X86ISD::BLENDI:             return "X86ISD::BLENDI";
   case X86ISD::HADD:               return "X86ISD::HADD";
   case X86ISD::HSUB:               return "X86ISD::HSUB";
   case X86ISD::FHADD:              return "X86ISD::FHADD";
@@ -12358,6 +12345,30 @@ bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
   return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
 }
 
+bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+  EVT VT1 = Val.getValueType();
+  if (isZExtFree(VT1, VT2))
+    return true;
+
+  if (Val.getOpcode() != ISD::LOAD)
+    return false;
+
+  if (!VT1.isSimple() || !VT1.isInteger() ||
+      !VT2.isSimple() || !VT2.isInteger())
+    return false;
+
+  switch (VT1.getSimpleVT().SimpleTy) {
+  default: break;
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
+    // X86 has 8, 16, and 32-bit zero-extending loads.
+    return true;
+  }
+
+  return false;
+}
+
 bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
   // i16 instructions are longer (0x66 prefix) and potentially slower.
   return !(VT1 == MVT::i32 && VT2 == MVT::i16);
@@ -12378,15 +12389,15 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
   return (VT.getVectorNumElements() == 2 ||
           ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
           isMOVLMask(M, VT) ||
-          isSHUFPMask(M, VT, Subtarget->hasAVX()) ||
+          isSHUFPMask(M, VT, Subtarget->hasFp256()) ||
           isPSHUFDMask(M, VT) ||
-          isPSHUFHWMask(M, VT, Subtarget->hasAVX2()) ||
-          isPSHUFLWMask(M, VT, Subtarget->hasAVX2()) ||
+          isPSHUFHWMask(M, VT, Subtarget->hasInt256()) ||
+          isPSHUFLWMask(M, VT, Subtarget->hasInt256()) ||
           isPALIGNRMask(M, VT, Subtarget) ||
-          isUNPCKLMask(M, VT, Subtarget->hasAVX2()) ||
-          isUNPCKHMask(M, VT, Subtarget->hasAVX2()) ||
-          isUNPCKL_v_undef_Mask(M, VT, Subtarget->hasAVX2()) ||
-          isUNPCKH_v_undef_Mask(M, VT, Subtarget->hasAVX2()));
+          isUNPCKLMask(M, VT, Subtarget->hasInt256()) ||
+          isUNPCKHMask(M, VT, Subtarget->hasInt256()) ||
+          isUNPCKL_v_undef_Mask(M, VT, Subtarget->hasInt256()) ||
+          isUNPCKH_v_undef_Mask(M, VT, Subtarget->hasInt256()));
 }
 
 bool
@@ -12399,8 +12410,8 @@ X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
   if (NumElts == 4 && VT.is128BitVector()) {
     return (isMOVLMask(Mask, VT)  ||
             isCommutedMOVLMask(Mask, VT, true) ||
-            isSHUFPMask(Mask, VT, Subtarget->hasAVX()) ||
-            isSHUFPMask(Mask, VT, Subtarget->hasAVX(), /* Commuted */ true));
+            isSHUFPMask(Mask, VT, Subtarget->hasFp256()) ||
+            isSHUFPMask(Mask, VT, Subtarget->hasFp256(), /* Commuted */ true));
   }
   return false;
 }
@@ -13470,7 +13481,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
     MBB->addSuccessor(EndMBB);
   }
 
-  unsigned MOVOpc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
+  unsigned MOVOpc = Subtarget->hasFp256() ? X86::VMOVAPSmr : X86::MOVAPSmr;
   // In the XMM save block, save all the XMM argument registers.
   for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
     int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
@@ -14515,7 +14526,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   // Combine 256-bit vector shuffles. This is only profitable when in AVX mode
-  if (Subtarget->hasAVX() && VT.is256BitVector() &&
+  if (Subtarget->hasFp256() && VT.is256BitVector() &&
       N->getOpcode() == ISD::VECTOR_SHUFFLE)
     return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
 
@@ -14543,7 +14554,7 @@ static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
   if (!DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  if (!Subtarget->hasAVX())
+  if (!Subtarget->hasFp256())
     return SDValue();
 
   EVT VT = N->getValueType(0);
@@ -14553,7 +14564,7 @@ static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
 
   if ((VT == MVT::v4i32) && (OpVT == MVT::v4i64)) {
 
-    if (Subtarget->hasAVX2()) {
+    if (Subtarget->hasInt256()) {
       // AVX2: v4i64 -> v4i32
 
       // VPERMD
@@ -14592,7 +14603,7 @@ static SDValue PerformTruncateCombine(SDNode *N, SelectionDAG &DAG,
 
   if ((VT == MVT::v8i16) && (OpVT == MVT::v8i32)) {
 
-    if (Subtarget->hasAVX2()) {
+    if (Subtarget->hasInt256()) {
       // AVX2: v8i32 -> v8i16
 
       Op = DAG.getNode(ISD::BITCAST, dl, MVT::v32i8, Op);
@@ -15552,7 +15563,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
     return SDValue();
 
   if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 &&
-      (!Subtarget->hasAVX2() ||
+      (!Subtarget->hasInt256() ||
        (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16)))
     return SDValue();
 
@@ -15861,7 +15872,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
   // look for psign/blend
   if (VT == MVT::v2i64 || VT == MVT::v4i64) {
     if (!Subtarget->hasSSSE3() ||
-        (VT == MVT::v4i64 && !Subtarget->hasAVX2()))
+        (VT == MVT::v4i64 && !Subtarget->hasInt256()))
       return SDValue();
 
     // Canonicalize pandn to RHS
@@ -15907,6 +15918,11 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
 
       DebugLoc DL = N->getDebugLoc();
 
+      // We are going to replace the AND, OR, NAND with either BLEND
+      // or PSIGN, which only look at the MSB. The VSRAI instruction
+      // does not affect the highest bit, so we can get rid of it.
+      Mask = Mask.getOperand(0);
+
       // Now we know we at least have a plendvb with the mask val.  See if
       // we can form a psignb/w/d.
       // psign = x.type == y.type == mask.type && y = sub(0, x);
@@ -15915,7 +15931,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
           X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
         assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
                "Unsupported VT for PSIGN");
-        Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0));
+        Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask);
         return DAG.getNode(ISD::BITCAST, DL, VT, Mask);
       }
       // PBLENDVB only available on SSE 4.1
@@ -16203,7 +16219,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
   // On Sandy Bridge, 256-bit memory operations are executed by two
   // 128-bit ports. However, on Haswell it is better to issue a single 256-bit
   // memory  operation.
-  if (VT.is256BitVector() && !Subtarget->hasAVX2() &&
+  if (VT.is256BitVector() && !Subtarget->hasInt256() &&
       StoredVal.getNode()->getOpcode() == ISD::CONCAT_VECTORS &&
       StoredVal.getNumOperands() == 2) {
     SDValue Value0 = StoredVal.getOperand(0);
@@ -16551,7 +16567,7 @@ static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
 
   // Try to synthesize horizontal adds from adds of shuffles.
   if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
-       (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
+       (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
       isHorizontalBinOp(LHS, RHS, true))
     return DAG.getNode(X86ISD::FHADD, N->getDebugLoc(), VT, LHS, RHS);
   return SDValue();
@@ -16566,7 +16582,7 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
 
   // Try to synthesize horizontal subs from subs of shuffles.
   if (((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) ||
-       (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
+       (Subtarget->hasFp256() && (VT == MVT::v8f32 || VT == MVT::v4f64))) &&
       isHorizontalBinOp(LHS, RHS, false))
     return DAG.getNode(X86ISD::FHSUB, N->getDebugLoc(), VT, LHS, RHS);
   return SDValue();
@@ -16661,7 +16677,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
   if (!DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  if (!Subtarget->hasAVX())
+  if (!Subtarget->hasFp256())
     return SDValue();
 
   EVT VT = N->getValueType(0);
@@ -16672,7 +16688,7 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
   if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) ||
       (VT == MVT::v8i32 && OpVT == MVT::v8i16)) {
 
-    if (Subtarget->hasAVX2())
+    if (Subtarget->hasInt256())
       return DAG.getNode(X86ISD::VSEXT_MOVL, dl, VT, Op);
 
     // Optimize vectors in AVX mode
@@ -16792,13 +16808,13 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
   if (!DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  if (!Subtarget->hasAVX())
+  if (!Subtarget->hasFp256())
     return SDValue();
 
   if (((VT == MVT::v8i32) && (OpVT == MVT::v8i16)) ||
       ((VT == MVT::v4i64) && (OpVT == MVT::v4i32)))  {
 
-    if (Subtarget->hasAVX2())
+    if (Subtarget->hasInt256())
       return DAG.getNode(X86ISD::VZEXT_MOVL, dl, VT, N0);
 
     SDValue ZeroVec = getZeroVector(OpVT, Subtarget, DAG, dl);
@@ -17024,7 +17040,7 @@ static SDValue PerformAddCombine(SDNode *N, SelectionDAG &DAG,
 
   // Try to synthesize horizontal adds from adds of shuffles.
   if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
-       (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+       (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
       isHorizontalBinOp(Op0, Op1, true))
     return DAG.getNode(X86ISD::HADD, N->getDebugLoc(), VT, Op0, Op1);
 
@@ -17057,7 +17073,7 @@ static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG,
   // Try to synthesize horizontal adds from adds of shuffles.
   EVT VT = N->getValueType(0);
   if (((Subtarget->hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
-       (Subtarget->hasAVX2() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
+       (Subtarget->hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
       isHorizontalBinOp(Op0, Op1, true))
     return DAG.getNode(X86ISD::HSUB, N->getDebugLoc(), VT, Op0, Op1);
 
@@ -17435,7 +17451,7 @@ TargetLowering::ConstraintWeight
   case 'x':
   case 'Y':
     if (((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasSSE1()) ||
-        ((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasAVX()))
+        ((type->getPrimitiveSizeInBits() == 256) && Subtarget->hasFp256()))
       weight = CW_Register;
     break;
   case 'I':
@@ -17912,6 +17928,17 @@ FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len,
   return -1;
 }
 
+ScalarTargetTransformInfo::PopcntHwSupport
+X86ScalarTargetTransformImpl::getPopcntHwSupport(unsigned TyWidth) const {
+  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+  const X86Subtarget &ST = TLI->getTargetMachine().getSubtarget<X86Subtarget>();
+
+  // TODO: Currently the __builtin_popcount() implementation using SSE3
+  //   instructions is inefficient. Once the problem is fixed, we should
+  //   call ST.hasSSE3() instead of ST.hasSSE4().
+  return ST.hasSSE41() ? Fast : None;
+}
+
 unsigned
 X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
                                                      Type *Ty) const {
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index b6e8960f76..cad471df4a 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -15,15 +15,15 @@
 #ifndef X86ISELLOWERING_H
 #define X86ISELLOWERING_H
 
-#include "X86Subtarget.h"
-#include "X86RegisterInfo.h"
 #include "X86MachineFunctionInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/Target/TargetOptions.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetTransformImpl.h"
 
 namespace llvm {
   namespace X86ISD {
@@ -176,13 +176,11 @@ namespace llvm {
       /// PSIGN - Copy integer sign.
       PSIGN,
 
-      /// BLENDV - Blend where the selector is an XMM.
+      /// BLENDV - Blend where the selector is a register.
       BLENDV,
 
-      /// BLENDxx - Blend where the selector is an immediate.
-      BLENDPW,
-      BLENDPS,
-      BLENDPD,
+      /// BLENDI - Blend where the selector is an immediate.
+      BLENDI,
 
       /// HADD - Integer horizontal add.
       HADD,
@@ -520,10 +518,9 @@ namespace llvm {
                         MachineFunction &MF) const;
 
     /// allowsUnalignedMemoryAccesses - Returns true if the target allows
-    /// unaligned memory accesses. of the specified type.
-    virtual bool allowsUnalignedMemoryAccesses(EVT VT) const {
-      return true;
-    }
+    /// unaligned memory accesses. of the specified type. Returns whether it
+    /// is "fast" by reference in the second argument.
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
 
     /// LowerOperation - Provide custom lowering hooks for some operations.
     ///
@@ -641,6 +638,7 @@ namespace llvm {
     /// result out to 64 bits.
     virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
     virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+    virtual bool isZExtFree(SDValue Val, EVT VT2) const;
 
     /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
     /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
@@ -961,6 +959,14 @@ namespace llvm {
                              const TargetLibraryInfo *libInfo);
   }
 
+  class X86ScalarTargetTransformImpl : public ScalarTargetTransformImpl {
+  public:
+    explicit X86ScalarTargetTransformImpl(const TargetLowering *TL) :
+      ScalarTargetTransformImpl(TL) {};
+
+    virtual PopcntHwSupport getPopcntHwSupport(unsigned TyWidth) const;
+  };
+
   class X86VectorTargetTransformInfo : public VectorTargetTransformImpl {
   public:
     explicit X86VectorTargetTransformInfo(const TargetLowering *TL) :
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 73ba0011df..09ab995166 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -187,9 +187,7 @@ def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
 
 def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
 
-def X86Blendpw   : SDNode<"X86ISD::BLENDPW",   SDTBlend>;
-def X86Blendps   : SDNode<"X86ISD::BLENDPS",   SDTBlend>;
-def X86Blendpd   : SDNode<"X86ISD::BLENDPD",   SDTBlend>;
+def X86Blendi    : SDNode<"X86ISD::BLENDI",   SDTBlend>;
 def X86Fmadd     : SDNode<"X86ISD::FMADD",     SDTFma>;
 def X86Fnmadd    : SDNode<"X86ISD::FNMADD",    SDTFma>;
 def X86Fmsub     : SDNode<"X86ISD::FMSUB",     SDTFma>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 0267fdd860..72d8b5c7fd 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -17,15 +17,15 @@
 #include "X86MachineFunctionInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Support/CommandLine.h"
@@ -1556,16 +1556,19 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
   case X86::MOVUPSrm:
   case X86::MOVAPDrm:
   case X86::MOVDQArm:
+  case X86::MOVDQUrm:
   case X86::VMOVSSrm:
   case X86::VMOVSDrm:
   case X86::VMOVAPSrm:
   case X86::VMOVUPSrm:
   case X86::VMOVAPDrm:
   case X86::VMOVDQArm:
+  case X86::VMOVDQUrm:
   case X86::VMOVAPSYrm:
   case X86::VMOVUPSYrm:
   case X86::VMOVAPDYrm:
   case X86::VMOVDQAYrm:
+  case X86::VMOVDQUYrm:
   case X86::MMX_MOVD64rm:
   case X86::MMX_MOVQ64rm:
   case X86::FsVMOVAPSrm:
@@ -2164,7 +2167,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
     }
     MI->setDesc(get(Opc));
     MI->getOperand(3).setImm(Size-Amt);
-    return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+    return TargetInstrInfo::commuteInstruction(MI, NewMI);
   }
   case X86::CMOVB16rr:  case X86::CMOVB32rr:  case X86::CMOVB64rr:
   case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
@@ -2243,7 +2246,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
     // Fallthrough intended.
   }
   default:
-    return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+    return TargetInstrInfo::commuteInstruction(MI, NewMI);
   }
 }
 
@@ -3988,6 +3991,21 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
     break;
   }
   default: {
+    if ((LoadMI->getOpcode() == X86::MOVSSrm ||
+         LoadMI->getOpcode() == X86::VMOVSSrm) &&
+        MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
+          > 4)
+      // These instructions only load 32 bits, we can't fold them if the
+      // destination register is wider than 32 bits (4 bytes).
+      return NULL;
+    if ((LoadMI->getOpcode() == X86::MOVSDrm ||
+         LoadMI->getOpcode() == X86::VMOVSDrm) &&
+        MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
+          > 8)
+      // These instructions only load 64 bits, we can't fold them if the
+      // destination register is wider than 64 bits (8 bytes).
+      return NULL;
+
     // Folding a normal load. Just copy the load's address operands.
     unsigned NumOps = LoadMI->getDesc().getNumOperands();
     for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
@@ -4055,7 +4073,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
 
   if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
     return true;
-  return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
+  return TargetInstrInfo::canFoldMemoryOperand(MI, Ops);
 }
 
 bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 229e8b263f..1912a936ce 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3499,13 +3499,13 @@ def VMOVDQArr  : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
 def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                     "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>,
                     VEX, VEX_L;
-}
 def VMOVDQUrr  : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
                     VEX;
 def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                     "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>,
                     VEX, VEX_L;
+}
 
 // For Disassembler
 let isCodeGenOnly = 1 in {
@@ -3525,7 +3525,8 @@ def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
                         IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
 }
 
-let canFoldAsLoad = 1, mayLoad = 1 in {
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
+    neverHasSideEffects = 1 in {
 def VMOVDQArm  : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>,
                    VEX;
@@ -3542,7 +3543,7 @@ let Predicates = [HasAVX] in {
 }
 }
 
-let mayStore = 1 in {
+let mayStore = 1, neverHasSideEffects = 1 in {
 def VMOVDQAmr  : VPDI<0x7F, MRMDestMem, (outs),
                      (ins i128mem:$dst, VR128:$src),
                      "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>,
@@ -3580,7 +3581,8 @@ def MOVDQUrr_REV :   I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
                        [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
 }
 
-let canFoldAsLoad = 1, mayLoad = 1 in {
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
+    neverHasSideEffects = 1 in {
 def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
                    "movdqa\t{$src, $dst|$dst, $src}",
                    [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/],
@@ -3604,25 +3606,17 @@ def MOVDQUmr :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
                  XS, Requires<[UseSSE2]>;
 }
 
-// Intrinsic forms of MOVDQU load and store
-def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
-                       "vmovdqu\t{$src, $dst|$dst, $src}",
-                       [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)],
-                       IIC_SSE_MOVU_P_MR>,
-                     XS, VEX, Requires<[HasAVX]>;
-
-def MOVDQUmr_Int :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
-                       "movdqu\t{$src, $dst|$dst, $src}",
-                       [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)],
-                       IIC_SSE_MOVU_P_MR>,
-                     XS, Requires<[UseSSE2]>;
-
 } // ExeDomain = SSEPackedInt
 
 let Predicates = [HasAVX] in {
+  def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
+            (VMOVDQUmr addr:$dst, VR128:$src)>;
   def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
             (VMOVDQUYmr addr:$dst, VR256:$src)>;
 }
+let Predicates = [UseSSE2] in
+def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src),
+          (MOVDQUmr addr:$dst, VR128:$src)>;
 
 //===---------------------------------------------------------------------===//
 // SSE2 - Packed Integer Arithmetic Instructions
@@ -6891,31 +6885,31 @@ let Predicates = [HasAVX] in {
                             (v4f64 VR256:$src2))),
             (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
 
-  def : Pat<(v8f32 (X86Blendps (v8f32 VR256:$src1), (v8f32 VR256:$src2),
+  def : Pat<(v8f32 (X86Blendi (v8f32 VR256:$src1), (v8f32 VR256:$src2),
                                (imm:$mask))),
-            (VBLENDPSYrri VR256:$src2, VR256:$src1, imm:$mask)>;
-  def : Pat<(v4f64 (X86Blendpd (v4f64 VR256:$src1), (v4f64 VR256:$src2),
+            (VBLENDPSYrri VR256:$src1, VR256:$src2, imm:$mask)>;
+  def : Pat<(v4f64 (X86Blendi (v4f64 VR256:$src1), (v4f64 VR256:$src2),
                                (imm:$mask))),
-            (VBLENDPDYrri VR256:$src2, VR256:$src1, imm:$mask)>;
+            (VBLENDPDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
 
-  def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2),
+  def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
                                (imm:$mask))),
-            (VPBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>;
-  def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2),
+            (VPBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
+  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
                                (imm:$mask))),
-            (VBLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>;
-  def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2),
+            (VBLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
+  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
                                (imm:$mask))),
-            (VBLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>;
+            (VBLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
 }
 
 let Predicates = [HasAVX2] in {
   def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1),
                             (v32i8 VR256:$src2))),
-            (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>;
-  def : Pat<(v16i16 (X86Blendpw (v16i16 VR256:$src1), (v16i16 VR256:$src2),
+            (VPBLENDVBYrr VR256:$src1, VR256:$src2, VR256:$mask)>;
+  def : Pat<(v16i16 (X86Blendi (v16i16 VR256:$src1), (v16i16 VR256:$src2),
                                (imm:$mask))),
-            (VPBLENDWYrri VR256:$src2, VR256:$src1, imm:$mask)>;
+            (VPBLENDWYrri VR256:$src1, VR256:$src2, imm:$mask)>;
 }
 
 /// SS41I_ternary_int - SSE 4.1 ternary operator
@@ -6979,15 +6973,15 @@ let Predicates = [UseSSE41] in {
                             (v2f64 VR128:$src2))),
             (BLENDVPDrr0 VR128:$src2, VR128:$src1)>;
 
-  def : Pat<(v8i16 (X86Blendpw (v8i16 VR128:$src1), (v8i16 VR128:$src2),
+  def : Pat<(v8i16 (X86Blendi (v8i16 VR128:$src1), (v8i16 VR128:$src2),
                                (imm:$mask))),
-            (PBLENDWrri VR128:$src2, VR128:$src1, imm:$mask)>;
-  def : Pat<(v4f32 (X86Blendps (v4f32 VR128:$src1), (v4f32 VR128:$src2),
+            (PBLENDWrri VR128:$src1, VR128:$src2, imm:$mask)>;
+  def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$src1), (v4f32 VR128:$src2),
                                (imm:$mask))),
-            (BLENDPSrri VR128:$src2, VR128:$src1, imm:$mask)>;
-  def : Pat<(v2f64 (X86Blendpd (v2f64 VR128:$src1), (v2f64 VR128:$src2),
+            (BLENDPSrri VR128:$src1, VR128:$src2, imm:$mask)>;
+  def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$src1), (v2f64 VR128:$src2),
                                (imm:$mask))),
-            (BLENDPDrri VR128:$src2, VR128:$src1, imm:$mask)>;
+            (BLENDPDrri VR128:$src1, VR128:$src2, imm:$mask)>;
 
 }
 
@@ -7873,6 +7867,13 @@ defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
                                     VR256, memopv4i64, i256mem>, VEX_L;
 }
 
+def : Pat<(v4i32 (X86Blendi (v4i32 VR128:$src1), (v4i32 VR128:$src2),
+                  imm:$mask)),
+          (VPBLENDDrri VR128:$src1, VR128:$src2, imm:$mask)>;
+def : Pat<(v8i32 (X86Blendi (v8i32 VR256:$src1), (v8i32 VR256:$src2),
+                  imm:$mask)),
+          (VPBLENDDYrri VR256:$src1, VR256:$src2, imm:$mask)>;
+
 //===----------------------------------------------------------------------===//
 // VPBROADCAST - Load from memory and broadcast to all elements of the
 //               destination operand
diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h
index d7c08dfb0f..7b0f3434d0 100644
--- a/lib/Target/X86/X86JITInfo.h
+++ b/lib/Target/X86/X86JITInfo.h
@@ -14,8 +14,8 @@
 #ifndef X86JITINFO_H
 #define X86JITINFO_H
 
-#include "llvm/Function.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Function.h"
 #include "llvm/Target/TargetJITInfo.h"
 
 namespace llvm {
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 083d75666f..9b6d1d4c13 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -13,9 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86AsmPrinter.h"
-#include "X86COFFMachineModuleInfo.h"
 #include "InstPrinter/X86ATTInstPrinter.h"
-#include "llvm/Type.h"
+#include "X86COFFMachineModuleInfo.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -24,9 +24,9 @@
 #include "llvm/MC/MCInstBuilder.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Target/X86/X86NaClRewriteFinalPass.cpp b/lib/Target/X86/X86NaClRewriteFinalPass.cpp
index b6276dc583..e850dcaf9e 100644
--- a/lib/Target/X86/X86NaClRewriteFinalPass.cpp
+++ b/lib/Target/X86/X86NaClRewriteFinalPass.cpp
@@ -181,7 +181,6 @@ bool X86NaClRewriteFinalPass::ApplyCommonRewrites(MachineBasicBlock &MBB,
   case X86::NACL_RESTBPr:
   case X86::NACL_RESTSPm:
   case X86::NACL_RESTSPr:
-    dbgs() << "inst, opcode not handled: " << MI << Opcode;
     assert(false && "NaCl Pseudo-inst not handled");
   case X86::NACL_RET32:
   case X86::NACL_RET64:
@@ -200,8 +199,6 @@ bool X86NaClRewriteFinalPass::runOnMachineFunction(MachineFunction &MF) {
   assert(subtarget->isTargetNaCl() && "Target in NaClRewriteFinal is not NaCl");
 
   DEBUG(dbgs() << "*************** NaCl Rewrite Final ***************\n");
-  DEBUG(dbgs() << " funcnum " << MF.getFunctionNumber() << " "
-               << MF.getFunction()->getName() << "\n");
 
   for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); 
        MFI != E; ++MFI) {
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index f95071792a..1b16d6d8f7 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -19,25 +19,25 @@
 #include "X86MachineFunctionInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Type.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/Type.h"
 
 #define GET_REGINFO_TARGET_DESC
 #include "X86GenRegisterInfo.inc"
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index a102935b4b..8c7e09c324 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -13,8 +13,8 @@
 
 #define DEBUG_TYPE "x86-selectiondag-info"
 #include "X86TargetMachine.h"
-#include "llvm/DerivedTypes.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/DerivedTypes.h"
 using namespace llvm;
 
 X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) :
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 0132f81410..52175bcd6c 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -17,8 +17,8 @@
 #include "llvm/GlobalValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Host.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 
@@ -242,12 +242,20 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
       ToggleFeature(X86::FeatureSlowBTMem);
     }
 
-    // If it's Nehalem, unaligned memory access is fast.
-    // Include Westmere and Sandy Bridge as well.
-    // FIXME: add later processors.
-    if (IsIntel && ((Family == 6 && Model == 26) ||
-        (Family == 6 && Model == 44) ||
-        (Family == 6 && Model == 42))) {
+    // If it's an Intel chip since Nehalem and not an Atom chip, unaligned
+    // memory access is fast. We hard code model numbers here because they
+    // aren't strictly increasing for Intel chips it seems.
+    if (IsIntel &&
+        ((Family == 6 && Model == 0x1E) || // Nehalem: Clarksfield, Lynnfield,
+                                           //          Jasper Froest
+         (Family == 6 && Model == 0x1A) || // Nehalem: Bloomfield, Nehalem-EP
+         (Family == 6 && Model == 0x2E) || // Nehalem: Nehalem-EX
+         (Family == 6 && Model == 0x25) || // Westmere: Arrandale, Clarksdale
+         (Family == 6 && Model == 0x2C) || // Westmere: Gulftown, Westmere-EP
+         (Family == 6 && Model == 0x2F) || // Westmere: Westmere-EX
+         (Family == 6 && Model == 0x2A) || // SandyBridge
+         (Family == 6 && Model == 0x2D) || // SandyBridge: SandyBridge-E*
+         (Family == 6 && Model == 0x3A))) {// IvyBridge
       IsUAMemFast = true;
       ToggleFeature(X86::FeatureFastUAMem);
     }
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index f86b947574..5e5485ea49 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -14,8 +14,8 @@
 #ifndef X86SUBTARGET_H
 #define X86SUBTARGET_H
 
-#include "llvm/CallingConv.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/CallingConv.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
@@ -208,6 +208,8 @@ public:
   bool hasSSE42() const { return X86SSELevel >= SSE42; }
   bool hasAVX() const { return X86SSELevel >= AVX; }
   bool hasAVX2() const { return X86SSELevel >= AVX2; }
+  bool hasFp256() const { return hasAVX(); }
+  bool hasInt256() const { return hasAVX2(); }
   bool hasSSE4A() const { return HasSSE4A; }
   bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
   bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
@@ -249,7 +251,9 @@ public:
             TargetTriple.isOSBinFormatELF());
   }
   bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
-  bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }  // @LOCALMOD
+  bool isTargetNaCl() const {
+    return TargetTriple.getOS() == Triple::NaCl;
+  }
   bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
   bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
   bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 5135946c97..38a4fb12ce 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -13,13 +13,13 @@
 
 #include "X86TargetMachine.h"
 #include "X86.h"
-#include "llvm/PassManager.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
 using namespace llvm;
 
 extern "C" void LLVMInitializeX86Target() {
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 12311a1abf..792f721e76 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -15,15 +15,15 @@
 #define X86TARGETMACHINE_H
 
 #include "X86.h"
-#include "X86InstrInfo.h"
-#include "X86ISelLowering.h"
 #include "X86FrameLowering.h"
+#include "X86ISelLowering.h"
+#include "X86InstrInfo.h"
 #include "X86JITInfo.h"
 #include "X86SelectionDAGInfo.h"
 #include "X86Subtarget.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/DataLayout.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetTransformImpl.h"
 
 namespace llvm {
@@ -118,7 +118,7 @@ class X86_64TargetMachine : public X86TargetMachine {
   X86SelectionDAGInfo TSInfo;
   X86TargetLowering TLInfo;
   X86JITInfo        JITInfo;
-  ScalarTargetTransformImpl STTI;
+  X86ScalarTargetTransformImpl STTI;
   X86VectorTargetTransformInfo VTTI;
 public:
   X86_64TargetMachine(const Target &T, StringRef TT,
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 1cfaeda0eb..4a1788a85e 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -16,9 +16,9 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
-#include "llvm/Target/Mangler.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ELF.h"
+#include "llvm/Target/Mangler.h"
 using namespace llvm;
 using namespace dwarf;
 
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index 2a382e25af..09cb6237b9 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -11,8 +11,8 @@
 #define LLVM_TARGET_X86_TARGETOBJECTFILE_H
 
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
 
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index caae562272..6760641efe 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -17,28 +17,28 @@
 #include "XCoreInstrInfo.h"
 #include "XCoreSubtarget.h"
 #include "XCoreTargetMachine.h"
-#include "llvm/Constants.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 #include <algorithm>
 #include <cctype>
 using namespace llvm;
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index e18d97384d..9257226f80 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -16,7 +16,6 @@
 #include "XCore.h"
 #include "XCoreInstrInfo.h"
 #include "XCoreMachineFunctionInfo.h"
-#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -24,8 +23,9 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Function.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetOptions.h"
 
 using namespace llvm;
 
@@ -98,7 +98,7 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   bool FP = hasFP(MF);
-  const AttrListPtr &PAL = MF.getFunction()->getAttributes();
+  const AttributeSet &PAL = MF.getFunction()->getAttributes();
 
   for (unsigned I = 0, E = PAL.getNumAttrs(); I != E; ++I)
     if (PAL.getAttributesAtIndex(I).hasAttribute(Attributes::Nest)) {
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 7564fbad7d..459664bf58 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -13,23 +13,23 @@
 
 #include "XCore.h"
 #include "XCoreTargetMachine.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
 #include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
 using namespace llvm;
 
 /// XCoreDAGToDAGISel - XCore specific code to select XCore machine
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index f1098f9dc3..391660a095 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -14,17 +14,12 @@
 #define DEBUG_TYPE "xcore-lower"
 
 #include "XCoreISelLowering.h"
-#include "XCoreMachineFunctionInfo.h"
 #include "XCore.h"
-#include "XCoreTargetObjectFile.h"
-#include "XCoreTargetMachine.h"
+#include "XCoreMachineFunctionInfo.h"
 #include "XCoreSubtarget.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Intrinsics.h"
+#include "XCoreTargetMachine.h"
+#include "XCoreTargetObjectFile.h"
 #include "llvm/CallingConv.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -33,6 +28,11 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index 0a3008d7ab..e457e0dbf0 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "XCoreInstrInfo.h"
-#include "XCoreMachineFunctionInfo.h"
 #include "XCore.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "XCoreMachineFunctionInfo.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index f869fcf26d..69d5de3e03 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -14,8 +14,8 @@
 #ifndef XCOREMACHINEFUNCTIONINFO_H
 #define XCOREMACHINEFUNCTIONINFO_H
 
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include <vector>
 
 namespace llvm {
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index be5855abcd..d8517d7b4e 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -12,25 +12,25 @@
 //===----------------------------------------------------------------------===//
 
 #include "XCoreRegisterInfo.h"
-#include "XCoreMachineFunctionInfo.h"
 #include "XCore.h"
-#include "llvm/Type.h"
-#include "llvm/Function.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Function.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Type.h"
 
 #define GET_REGINFO_TARGET_DESC
 #include "XCoreGenRegisterInfo.inc"
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index 8d0f254e08..5ac4dbc4bc 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -14,8 +14,8 @@
 #ifndef XCORESUBTARGET_H
 #define XCORESUBTARGET_H
 
-#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
 #define GET_SUBTARGETINFO_HEADER
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index d5a932c518..b130fb9daa 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -12,9 +12,9 @@
 
 #include "XCoreTargetMachine.h"
 #include "XCore.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index c60c6a37f9..42bfcb4a03 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -15,13 +15,13 @@
 #define XCORETARGETMACHINE_H
 
 #include "XCoreFrameLowering.h"
-#include "XCoreSubtarget.h"
-#include "XCoreInstrInfo.h"
 #include "XCoreISelLowering.h"
+#include "XCoreInstrInfo.h"
 #include "XCoreSelectionDAGInfo.h"
+#include "XCoreSubtarget.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetTransformImpl.h"
-#include "llvm/DataLayout.h"
 
 namespace llvm {
 
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp
index 7f4e1c1b4f..820389935b 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -11,8 +11,8 @@
 #include "XCoreSubtarget.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 
 
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index b0e22de8d7..d0b146b4e9 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -13,10 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "hello"
-#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Function.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(HelloCounter, "Counts number of functions greeted");
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index be48b2063f..2132e0a5fe 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -31,21 +31,21 @@
 
 #define DEBUG_TYPE "argpromotion"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/CallGraphSCCPass.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/CallGraphSCCPass.h"
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
 #include <set>
 using namespace llvm;
 
@@ -515,12 +515,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   // that we are *not* promoting. For the ones that we do promote, the parameter
   // attributes are lost
   SmallVector<AttributeWithIndex, 8> AttributesVec;
-  const AttrListPtr &PAL = F->getAttributes();
+  const AttributeSet &PAL = F->getAttributes();
 
   // Add any return attributes.
   Attributes attrs = PAL.getRetAttributes();
   if (attrs.hasAttributes())
-    AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+    AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex,
                                                     attrs));
 
   // First, determine the new argument list
@@ -593,7 +593,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   // Add any function attributes.
   attrs = PAL.getFnAttributes();
   if (attrs.hasAttributes())
-    AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+    AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex,
                                                     attrs));
 
   Type *RetTy = FTy->getReturnType();
@@ -611,7 +611,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   
   // Recompute the parameter attributes list based on the new arguments for
   // the function.
-  NF->setAttributes(AttrListPtr::get(F->getContext(), AttributesVec));
+  NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec));
   AttributesVec.clear();
 
   F->getParent()->getFunctionList().insert(F, NF);
@@ -636,12 +636,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     CallSite CS(F->use_back());
     assert(CS.getCalledFunction() == F);
     Instruction *Call = CS.getInstruction();
-    const AttrListPtr &CallPAL = CS.getAttributes();
+    const AttributeSet &CallPAL = CS.getAttributes();
 
     // Add any return attributes.
     Attributes attrs = CallPAL.getRetAttributes();
     if (attrs.hasAttributes())
-      AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+      AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex,
                                                       attrs));
 
     // Loop over the operands, inserting GEP and loads in the caller as
@@ -723,7 +723,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     // Add any function attributes.
     attrs = CallPAL.getFnAttributes();
     if (attrs.hasAttributes())
-      AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+      AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex,
                                                       attrs));
 
     Instruction *New;
@@ -731,12 +731,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
       New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
                                Args, "", Call);
       cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
-      cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(II->getContext(),
+      cast<InvokeInst>(New)->setAttributes(AttributeSet::get(II->getContext(),
                                                             AttributesVec));
     } else {
       New = CallInst::Create(NF, Args, "", Call);
       cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
-      cast<CallInst>(New)->setAttributes(AttrListPtr::get(New->getContext(),
+      cast<CallInst>(New)->setAttributes(AttributeSet::get(New->getContext(),
                                                           AttributesVec));
       if (cast<CallInst>(Call)->isTailCall())
         cast<CallInst>(New)->setTailCall();
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index e2f012657f..d30eeaf7d3 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -19,15 +19,15 @@
 
 #define DEBUG_TYPE "constmerge"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/DataLayout.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
 using namespace llvm;
 
 STATISTIC(NumMerged, "Number of global constants merged");
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 4cfd0b235a..6236a04fc2 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -19,11 +19,15 @@
 
 #define DEBUG_TYPE "deadargelim"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constant.h"
+#include "llvm/DIBuilder.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/DIBuilder.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
@@ -32,10 +36,6 @@
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
 #include <map>
 #include <set>
 using namespace llvm;
@@ -271,16 +271,16 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
     Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs);
 
     // Drop any attributes that were on the vararg arguments.
-    AttrListPtr PAL = CS.getAttributes();
+    AttributeSet PAL = CS.getAttributes();
     if (!PAL.isEmpty() && PAL.getSlot(PAL.getNumSlots() - 1).Index > NumArgs) {
       SmallVector<AttributeWithIndex, 8> AttributesVec;
       for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i)
         AttributesVec.push_back(PAL.getSlot(i));
       Attributes FnAttrs = PAL.getFnAttributes();
       if (FnAttrs.hasAttributes())
-        AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+        AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex,
                                                         FnAttrs));
-      PAL = AttrListPtr::get(Fn.getContext(), AttributesVec);
+      PAL = AttributeSet::get(Fn.getContext(), AttributesVec);
     }
 
     Instruction *New;
@@ -698,7 +698,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
 
   // Set up to build a new list of parameter attributes.
   SmallVector<AttributeWithIndex, 8> AttributesVec;
-  const AttrListPtr &PAL = F->getAttributes();
+  const AttributeSet &PAL = F->getAttributes();
 
   // The existing function return attributes.
   Attributes RAttrs = PAL.getRetAttributes();
@@ -773,7 +773,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
            "Return attributes no longer compatible?");
 
   if (RAttrs.hasAttributes())
-    AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+    AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex,
                                                     RAttrs));
 
   // Remember which arguments are still alive.
@@ -802,11 +802,11 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   }
 
   if (FnAttrs.hasAttributes())
-    AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+    AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex,
                                                     FnAttrs));
 
   // Reconstruct the AttributesList based on the vector we constructed.
-  AttrListPtr NewPAL = AttrListPtr::get(F->getContext(), AttributesVec);
+  AttributeSet NewPAL = AttributeSet::get(F->getContext(), AttributesVec);
 
   // Create the new function type based on the recomputed parameters.
   FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
@@ -833,7 +833,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
     Instruction *Call = CS.getInstruction();
 
     AttributesVec.clear();
-    const AttrListPtr &CallPAL = CS.getAttributes();
+    const AttributeSet &CallPAL = CS.getAttributes();
 
     // The call return attributes.
     Attributes RAttrs = CallPAL.getRetAttributes();
@@ -843,7 +843,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
       Attributes::get(NF->getContext(), AttrBuilder(RAttrs).
            removeAttributes(Attributes::typeIncompatible(NF->getReturnType())));
     if (RAttrs.hasAttributes())
-      AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+      AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex,
                                                       RAttrs));
 
     // Declare these outside of the loops, so we can reuse them for the second
@@ -870,11 +870,11 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
     }
 
     if (FnAttrs.hasAttributes())
-      AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+      AttributesVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex,
                                                       FnAttrs));
 
     // Reconstruct the AttributesList based on the vector we constructed.
-    AttrListPtr NewCallPAL = AttrListPtr::get(F->getContext(), AttributesVec);
+    AttributeSet NewCallPAL = AttributeSet::get(F->getContext(), AttributesVec);
 
     Instruction *New;
     if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 05aefeff9f..1dc79d113e 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -11,13 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Constants.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/ADT/SetVector.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 18409f77b3..685833da1a 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -20,17 +20,17 @@
 
 #define DEBUG_TYPE "functionattrs"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/CallGraphSCCPass.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/InstIterator.h"
 using namespace llvm;
 
@@ -215,13 +215,13 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
     AttrBuilder B;
     B.addAttribute(Attributes::ReadOnly)
       .addAttribute(Attributes::ReadNone);
-    F->removeAttribute(AttrListPtr::FunctionIndex,
+    F->removeAttribute(AttributeSet::FunctionIndex,
                        Attributes::get(F->getContext(), B));
 
     // Add in the new attribute.
     B.clear();
     B.addAttribute(ReadsMemory ? Attributes::ReadOnly : Attributes::ReadNone);
-    F->addAttribute(AttrListPtr::FunctionIndex,
+    F->addAttribute(AttributeSet::FunctionIndex,
                     Attributes::get(F->getContext(), B));
 
     if (ReadsMemory)
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 18c1c7b000..b2c819de2b 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -17,11 +17,11 @@
 
 #define DEBUG_TYPE "globaldce"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Constants.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumAliases  , "Number of global aliases removed");
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 591278fa62..20f9de5a83 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -15,29 +15,29 @@
 
 #define DEBUG_TYPE "globalopt"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
 #include "llvm/Operator.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -148,17 +148,13 @@ struct GlobalStatus {
   /// an instruction (e.g. a constant expr or GV initializer).
   bool HasNonInstructionUser;
 
-  /// HasPHIUser - Set to true if this global has a user that is a PHI node.
-  bool HasPHIUser;
-
   /// AtomicOrdering - Set to the strongest atomic ordering requirement.
   AtomicOrdering Ordering;
 
   GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
                    StoredOnceValue(0), AccessingFunction(0),
                    HasMultipleAccessingFunctions(false),
-                   HasNonInstructionUser(false), HasPHIUser(false),
-                   Ordering(NotAtomic) {}
+                   HasNonInstructionUser(false), Ordering(NotAtomic) {}
 };
 
 }
@@ -200,11 +196,11 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
     const User *U = *UI;
     if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
       GS.HasNonInstructionUser = true;
-      
+
       // If the result of the constantexpr isn't pointer type, then we won't
       // know to expect it in various places.  Just reject early.
       if (!isa<PointerType>(CE->getType())) return true;
-      
+
       if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
     } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
       if (!GS.HasMultipleAccessingFunctions) {
@@ -274,7 +270,6 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
         // have to be careful about infinite recursion.
         if (PHIUsers.insert(PN))  // Not already visited.
           if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
-        GS.HasPHIUser = true;
       } else if (isa<CmpInst>(I)) {
         GS.isCompared = true;
       } else if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
@@ -2070,7 +2065,7 @@ static void ChangeCalleesToFastCall(Function *F) {
   }
 }
 
-static AttrListPtr StripNest(LLVMContext &C, const AttrListPtr &Attrs) {
+static AttributeSet StripNest(LLVMContext &C, const AttributeSet &Attrs) {
   for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
     if (!Attrs.getSlot(i).Attrs.hasAttribute(Attributes::Nest))
       continue;
@@ -2157,7 +2152,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
 GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
   GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
   if (GV == 0) return 0;
-  
+
   // Verify that the initializer is simple enough for us to handle. We are
   // only allowed to optimize the initializer if it is unique.
   if (!GV->hasUniqueInitializer()) return 0;
@@ -2263,7 +2258,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
 }
 
 
-static inline bool 
+static inline bool
 isSimpleEnoughValueToCommit(Constant *C,
                             SmallPtrSet<Constant*, 8> &SimpleConstants,
                             const DataLayout *TD);
@@ -2285,7 +2280,7 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
   if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
       isa<GlobalValue>(C))
     return true;
-  
+
   // Aggregate values are safe if all their elements are.
   if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
       isa<ConstantVector>(C)) {
@@ -2296,7 +2291,7 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
     }
     return true;
   }
-  
+
   // We don't know exactly what relocations are allowed in constant expressions,
   // so we allow &global+constantoffset, which is safe and uniformly supported
   // across targets.
@@ -2314,14 +2309,14 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
                TD->getTypeSizeInBits(CE->getOperand(0)->getType()))
       return false;
     return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
-      
+
   // GEP is fine if it is simple + constant offset.
   case Instruction::GetElementPtr:
     for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
       if (!isa<ConstantInt>(CE->getOperand(i)))
         return false;
     return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants, TD);
-      
+
   case Instruction::Add:
     // We allow simple+cst.
     if (!isa<ConstantInt>(CE->getOperand(1)))
@@ -2331,7 +2326,7 @@ static bool isSimpleEnoughValueToCommitHelper(Constant *C,
   return false;
 }
 
-static inline bool 
+static inline bool
 isSimpleEnoughValueToCommit(Constant *C,
                             SmallPtrSet<Constant*, 8> &SimpleConstants,
                             const DataLayout *TD) {
@@ -2379,7 +2374,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
         return false;
 
       return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
-    
+
     // A constantexpr bitcast from a pointer to another pointer is a no-op,
     // and we know how to evaluate it by moving the bitcast from the pointer
     // operand to the value operand.
@@ -2390,7 +2385,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
       return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
     }
   }
-  
+
   return false;
 }
 
@@ -2420,7 +2415,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
     // Return the modified struct.
     return ConstantStruct::get(STy, Elts);
   }
-  
+
   ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
   SequentialType *InitTy = cast<SequentialType>(Init->getType());
 
@@ -2597,23 +2592,23 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
       if (!isSimpleEnoughPointerToCommit(Ptr))
         // If this is too complex for us to commit, reject it.
         return false;
-      
+
       Constant *Val = getVal(SI->getOperand(0));
 
       // If this might be too difficult for the backend to handle (e.g. the addr
       // of one global variable divided by another) then we can't commit it.
       if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, TD))
         return false;
-        
+
       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
         if (CE->getOpcode() == Instruction::BitCast) {
           // If we're evaluating a store through a bitcast, then we need
           // to pull the bitcast off the pointer type and push it onto the
           // stored value.
           Ptr = CE->getOperand(0);
-          
+
           Type *NewTy = cast<PointerType>(Ptr->getType())->getElementType();
-          
+
           // In order to push the bitcast onto the stored value, a bitcast
           // from NewTy to Val's type must be legal.  If it's not, we can try
           // introspecting NewTy to find a legal conversion.
@@ -2638,12 +2633,12 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
               return false;
             }
           }
-          
+
           // If we found compatible types, go ahead and push the bitcast
           // onto the stored value.
           Val = ConstantExpr::getBitCast(Val, NewTy);
         }
-          
+
       MutatedMemory[Ptr] = Val;
     } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
       InstResult = ConstantExpr::get(BO->getOpcode(),
@@ -2805,7 +2800,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
     if (!CurInst->use_empty()) {
       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
         InstResult = ConstantFoldConstantExpression(CE, TD, TLI);
-      
+
       setVal(CurInst, InstResult);
     }
 
@@ -2891,7 +2886,7 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout *TD,
   Constant *RetValDummy;
   bool EvalSuccess = Eval.EvaluateFunction(F, RetValDummy,
                                            SmallVector<Constant*, 0>());
-  
+
   if (EvalSuccess) {
     // We succeeded at evaluation: commit the result.
     DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
@@ -3011,13 +3006,13 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
     return 0;
 
   Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
-  
+
   if (!Fn)
     return 0;
 
   FunctionType *FTy = Fn->getFunctionType();
-  
-  // Checking that the function has the right return type, the right number of 
+
+  // Checking that the function has the right return type, the right number of
   // parameters and that they all have pointer types should be enough.
   if (!FTy->getReturnType()->isIntegerTy() ||
       FTy->getNumParams() != 3 ||
@@ -3092,7 +3087,7 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
   // and remove them.
   bool Changed = false;
 
-  for (Function::use_iterator I = CXAAtExitFn->use_begin(), 
+  for (Function::use_iterator I = CXAAtExitFn->use_begin(),
        E = CXAAtExitFn->use_end(); I != E;) {
     // We're only interested in calls. Theoretically, we could handle invoke
     // instructions as well, but neither llvm-gcc nor clang generate invokes
@@ -3101,7 +3096,7 @@ bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) {
     if (!CI)
       continue;
 
-    Function *DtorFn = 
+    Function *DtorFn =
       dyn_cast<Function>(CI->getArgOperand(0)->stripPointerCasts());
     if (!DtorFn)
       continue;
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index d757e1fdb1..252b5b0584 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -17,14 +17,14 @@
 
 #define DEBUG_TYPE "ipconstprop"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 STATISTIC(NumArgumentsProped, "Number of args turned into constants");
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 6f4b810acc..5b8832e5d7 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -13,18 +13,18 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "inline"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
 #include "llvm/CallingConv.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/InlineCost.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
-#include "llvm/DataLayout.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Type.h"
 
 using namespace llvm;
 
@@ -44,6 +44,10 @@ namespace {
     }
     static char ID; // Pass identification, replacement for typeid
     virtual InlineCost getInlineCost(CallSite CS);
+
+    using llvm::Pass::doInitialization;
+    using llvm::Pass::doFinalization;
+
     virtual bool doFinalization(CallGraph &CG) {
       return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true);
     }
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index bf0b1f91a2..9c5feba08b 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -12,17 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "inline"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
 #include "llvm/CallingConv.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/InlineCost.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
-#include "llvm/DataLayout.h"
+#include "llvm/Type.h"
 
 using namespace llvm;
 
@@ -42,6 +42,7 @@ namespace {
     InlineCost getInlineCost(CallSite CS) {
       return CA.getInlineCost(CS, getInlineThreshold(CS));
     }
+    using llvm::Pass::doInitialization;
     virtual bool doInitialization(CallGraph &CG);
   };
 }
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index abcb25fd45..bd8fa66d52 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -14,22 +14,22 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "inline"
-#include "llvm/Module.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/IPO/InlinerPass.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 STATISTIC(NumInlined, "Number of functions inlined");
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index aa629cc0c6..b2cd3a765a 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -14,14 +14,14 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "internalize"
-#include "llvm/Analysis/CallGraph.h"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
 #include "llvm/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
 #include <fstream>
 #include <set>
 using namespace llvm;
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 97d7cdced0..af04d054ed 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -16,16 +16,16 @@
 
 #define DEBUG_TYPE "loop-extract"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/CodeExtractor.h"
-#include "llvm/ADT/Statistic.h"
 #include <fstream>
 #include <set>
 using namespace llvm;
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 44283ddce7..70345b8334 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -45,7 +45,13 @@
 
 #define DEBUG_TYPE "mergefunc"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
@@ -53,17 +59,11 @@
 #include "llvm/Module.h"
 #include "llvm/Operator.h"
 #include "llvm/Pass.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include <vector>
 using namespace llvm;
 
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 9c9910bd5c..6bd9c8372e 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -14,14 +14,14 @@
 
 #define DEBUG_TYPE "partialinlining"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/Dominators.h"
+#include "llvm/Support/CFG.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/CodeExtractor.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CFG.h"
 using namespace llvm;
 
 STATISTIC(NumPartialInlined, "Number of functions partially inlined");
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 48e20ec339..a9a9f2eece 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -14,21 +14,19 @@
 
 
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
-
 #include "llvm-c/Transforms/PassManagerBuilder.h"
-
-#include "llvm/PassManager.h"
-#include "llvm/DefaultPasses.h"
-#include "llvm/PassManager.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/DefaultPasses.h"
+#include "llvm/PassManager.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Vectorize.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ManagedStatic.h"
 
 using namespace llvm;
 
@@ -190,10 +188,8 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
   MPM.add(createLoopIdiomPass());             // Recognize idioms like memset.
   MPM.add(createLoopDeletionPass());          // Delete dead loops
 
-  if (LoopVectorize) {
+  if (LoopVectorize && OptLevel > 1)
     MPM.add(createLoopVectorizePass());
-    MPM.add(createLICMPass());
-  }
 
   if (!DisableUnrollLoops)
     MPM.add(createLoopUnrollPass());          // Unroll small loops
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index fb4ecbfe7b..19f34837c7 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -16,16 +16,16 @@
 
 #define DEBUG_TYPE "prune-eh"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CallGraph.h"
 #include "llvm/CallGraphSCCPass.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/CFG.h"
 #include <algorithm>
 using namespace llvm;
@@ -145,8 +145,8 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
         NewAttributes.addAttribute(Attributes::NoReturn);
 
       Function *F = (*I)->getFunction();
-      const AttrListPtr &PAL = F->getAttributes();
-      const AttrListPtr &NPAL = PAL.addAttr(F->getContext(), ~0,
+      const AttributeSet &PAL = F->getAttributes();
+      const AttributeSet &NPAL = PAL.addAttr(F->getContext(), ~0,
                                             Attributes::get(F->getContext(),
                                                             NewAttributes));
       if (PAL != NPAL) {
diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp
index b5f09eccca..80cb869f02 100644
--- a/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -16,9 +16,9 @@
 
 #define DEBUG_TYPE "strip-dead-prototypes"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Pass.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
 using namespace llvm;
 
 STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 80bfc1cdb2..ad915d716f 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -21,17 +21,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Constants.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/TypeFinder.h"
 #include "llvm/ValueSymbolTable.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
 using namespace llvm;
 
 namespace {
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 7467eca7ab..0570104205 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -11,12 +11,12 @@
 #define INSTCOMBINE_INSTCOMBINE_H
 
 #include "InstCombineWorklist.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IRBuilder.h"
+#include "llvm/InstVisitor.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Operator.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/TargetFolder.h"
 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
 
@@ -327,6 +327,11 @@ private:
   bool SimplifyDemandedBits(Use &U, APInt DemandedMask, 
                             APInt& KnownZero, APInt& KnownOne,
                             unsigned Depth=0);
+  /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
+  /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence.
+  Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl,
+                                    APInt DemandedMask, APInt &KnownZero,
+                                    APInt &KnownOne);
       
   /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
   /// SimplifyDemandedBits knows about.  See if the instruction has any
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 12faedb0ff..f5c42a7983 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/Intrinsics.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Transforms/Utils/CmpInstAnalysis.h"
 using namespace llvm;
 using namespace PatternMatch;
 
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index aa9512cf03..41fe0873b3 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -12,13 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/DataLayout.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
+STATISTIC(NumSimplified, "Number of library calls simplified");
+
 /// getPromotedType - Return the specified type promoted as it would be to pass
 /// though a va_arg area.
 static Type *getPromotedType(Type *Ty) {
@@ -785,8 +788,10 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *TD) {
   if (CI->getCalledFunction() == 0) return 0;
 
-  if (Value *With = Simplifier->optimizeCall(CI))
-    return ReplaceInstUsesWith(*CI, With);
+  if (Value *With = Simplifier->optimizeCall(CI)) {
+    ++NumSimplified;
+    return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With);
+  }
 
   return 0;
 }
@@ -977,7 +982,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   if (Callee == 0)
     return false;
   Instruction *Caller = CS.getInstruction();
-  const AttrListPtr &CallerPAL = CS.getAttributes();
+  const AttributeSet &CallerPAL = CS.getAttributes();
 
   // Okay, this is a cast from a function to a different type.  Unless doing so
   // would cause a type conversion of one of our arguments, change this call to
@@ -1118,7 +1123,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   // Add the new return attributes.
   if (RAttrs.hasAttributes())
     attrVec.push_back(
-      AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+      AttributeWithIndex::get(AttributeSet::ReturnIndex,
                               Attributes::get(FT->getContext(), RAttrs)));
 
   AI = CS.arg_begin();
@@ -1173,13 +1178,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
 
   Attributes FnAttrs = CallerPAL.getFnAttributes();
   if (FnAttrs.hasAttributes())
-    attrVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+    attrVec.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex,
                                               FnAttrs));
 
   if (NewRetTy->isVoidTy())
     Caller->setName("");   // Void type should not have a name.
 
-  const AttrListPtr &NewCallerPAL = AttrListPtr::get(Callee->getContext(),
+  const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(),
                                                      attrVec);
 
   Instruction *NC;
@@ -1240,7 +1245,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
   Value *Callee = CS.getCalledValue();
   PointerType *PTy = cast<PointerType>(Callee->getType());
   FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
-  const AttrListPtr &Attrs = CS.getAttributes();
+  const AttributeSet &Attrs = CS.getAttributes();
 
   // If the call already has the 'nest' attribute somewhere then give up -
   // otherwise 'nest' would occur twice after splicing in the chain.
@@ -1255,7 +1260,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
   PointerType *NestFPTy = cast<PointerType>(NestF->getType());
   FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
 
-  const AttrListPtr &NestAttrs = NestF->getAttributes();
+  const AttributeSet &NestAttrs = NestF->getAttributes();
   if (!NestAttrs.isEmpty()) {
     unsigned NestIdx = 1;
     Type *NestTy = 0;
@@ -1285,7 +1290,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
       // Add any result attributes.
       Attributes Attr = Attrs.getRetAttributes();
       if (Attr.hasAttributes())
-        NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+        NewAttrs.push_back(AttributeWithIndex::get(AttributeSet::ReturnIndex,
                                                    Attr));
 
       {
@@ -1318,7 +1323,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
       // Add any function attributes.
       Attr = Attrs.getFnAttributes();
       if (Attr.hasAttributes())
-        NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+        NewAttrs.push_back(AttributeWithIndex::get(AttributeSet::FunctionIndex,
                                                    Attr));
 
       // The trampoline may have been bitcast to a bogus type (FTy).
@@ -1358,7 +1363,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
         NestF->getType() == PointerType::getUnqual(NewFTy) ?
         NestF : ConstantExpr::getBitCast(NestF,
                                          PointerType::getUnqual(NewFTy));
-      const AttrListPtr &NewPAL = AttrListPtr::get(FTy->getContext(), NewAttrs);
+      const AttributeSet &NewPAL = AttributeSet::get(FTy->getContext(), NewAttrs);
 
       Instruction *NewCaller;
       if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index bb59db8e7b..19de62c81f 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -14,8 +14,8 @@
 #include "InstCombine.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 using namespace llvm;
 using namespace PatternMatch;
 
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 7c3f8fe15d..1b96c3cca4 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -12,15 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 using namespace llvm;
 using namespace PatternMatch;
 
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index fd684200fc..5726d3a91d 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/DataLayout.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumDeadStore,    "Number of dead stores eliminated");
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index cefe45ec86..5cd611c420 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -13,8 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Support/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
@@ -252,6 +252,46 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   return Changed ? &I : 0;
 }
 
+//
+// Detect pattern:
+//
+// log2(Y*0.5)
+//
+// And check for corresponding fast math flags
+//
+
+static void detectLog2OfHalf(Value *&Op, Value *&Y, IntrinsicInst *&Log2) {
+
+   if (!Op->hasOneUse())
+     return;
+
+   IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op);
+   if (!II)
+     return;
+   if (II->getIntrinsicID() != Intrinsic::log2 || !II->hasUnsafeAlgebra())
+     return;
+   Log2 = II;
+
+   Value *OpLog2Of = II->getArgOperand(0);
+   if (!OpLog2Of->hasOneUse())
+     return;
+
+   Instruction *I = dyn_cast<Instruction>(OpLog2Of);
+   if (!I)
+     return;
+   if (I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
+     return;
+              
+   ConstantFP *CFP = dyn_cast<ConstantFP>(I->getOperand(0));
+   if (CFP && CFP->isExactlyValue(0.5)) {
+     Y = I->getOperand(1);
+     return;
+   }
+   CFP = dyn_cast<ConstantFP>(I->getOperand(1));
+   if (CFP && CFP->isExactlyValue(0.5))
+     Y = I->getOperand(0);
+} 
+
 Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
   bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -284,6 +324,33 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
     if (Value *Op1v = dyn_castFNegVal(Op1))
       return BinaryOperator::CreateFMul(Op0v, Op1v);
 
+  // Under unsafe algebra do:
+  // X * log2(0.5*Y) = X*log2(Y) - X
+  if (I.hasUnsafeAlgebra()) {
+    Value *OpX = NULL;
+    Value *OpY = NULL;
+    IntrinsicInst *Log2;
+    detectLog2OfHalf(Op0, OpY, Log2);
+    if (OpY) {
+      OpX = Op1;
+    } else {
+      detectLog2OfHalf(Op1, OpY, Log2);
+      if (OpY) {
+        OpX = Op0;
+      }
+    }
+    // if pattern detected emit alternate sequence
+    if (OpX && OpY) {
+      Log2->setArgOperand(0, OpY);
+      Value *FMulVal = Builder->CreateFMul(OpX, Log2);
+      Instruction *FMul = cast<Instruction>(FMulVal);
+      FMul->copyFastMathFlags(Log2);
+      Instruction *FSub = BinaryOperator::CreateFSub(FMulVal, OpX);
+      FSub->copyFastMathFlags(Log2);
+      return FSub;
+    }
+  }
+
   return Changed ? &I : 0;
 }
 
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index de9c77e600..ea127e9f53 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -12,10 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/DataLayout.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/STLExtras.h"
 using namespace llvm;
 
 /// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)]
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index a2d4c888f2..a262d711d3 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/Support/PatternMatch.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
 
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 57021f1bef..8a28d8eaa2 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Support/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
@@ -49,7 +49,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
     I.setOperand(1, Rem);
     return &I;
   }
-  
+
   return 0;
 }
 
@@ -70,10 +70,10 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
   // We can always evaluate constants shifted.
   if (isa<Constant>(V))
     return true;
-  
+
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) return false;
-  
+
   // If this is the opposite shift, we can directly reuse the input of the shift
   // if the needed bits are already zero in the input.  This allows us to reuse
   // the value which means that we don't care if the shift has multiple uses.
@@ -95,14 +95,14 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
         return CanEvaluateTruncated(I->getOperand(0), Ty);
       }
 #endif
-      
+
     }
   }
-  
+
   // We can't mutate something that has multiple uses: doing so would
   // require duplicating the instruction in general, which isn't profitable.
   if (!I->hasOneUse()) return false;
-  
+
   switch (I->getOpcode()) {
   default: return false;
   case Instruction::And:
@@ -111,7 +111,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
     // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
     return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) &&
            CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC);
-      
+
   case Instruction::Shl: {
     // We can often fold the shift into shifts-by-a-constant.
     CI = dyn_cast<ConstantInt>(I->getOperand(1));
@@ -119,10 +119,10 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
 
     // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
     if (isLeftShift) return true;
-    
+
     // We can always turn shl(c)+shr(c) -> and(c2).
     if (CI->getValue() == NumBits) return true;
-      
+
     unsigned TypeWidth = I->getType()->getScalarSizeInBits();
 
     // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't
@@ -133,20 +133,20 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
                        APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
         return true;
     }
-      
+
     return false;
   }
   case Instruction::LShr: {
     // We can often fold the shift into shifts-by-a-constant.
     CI = dyn_cast<ConstantInt>(I->getOperand(1));
     if (CI == 0) return false;
-    
+
     // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
     if (!isLeftShift) return true;
-    
+
     // We can always turn lshr(c)+shl(c) -> and(c2).
     if (CI->getValue() == NumBits) return true;
-      
+
     unsigned TypeWidth = I->getType()->getScalarSizeInBits();
 
     // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't
@@ -157,7 +157,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
                           APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
         return true;
     }
-      
+
     return false;
   }
   case Instruction::Select: {
@@ -175,7 +175,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
         return false;
     return true;
   }
-  }      
+  }
 }
 
 /// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
@@ -194,7 +194,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
                                          IC.getTargetLibraryInfo());
     return V;
   }
-  
+
   Instruction *I = cast<Instruction>(V);
   IC.Worklist.Add(I);
 
@@ -207,7 +207,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
     I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
     I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
     return I;
-    
+
   case Instruction::Shl: {
     BinaryOperator *BO = cast<BinaryOperator>(I);
     unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
@@ -227,7 +227,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
       BO->setHasNoSignedWrap(false);
       return I;
     }
-    
+
     // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have
     // zeros.
     if (CI->getValue() == NumBits) {
@@ -240,7 +240,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
       }
       return V;
     }
-    
+
     // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
     // the and won't be needed.
     assert(CI->getZExtValue() > NumBits);
@@ -255,19 +255,19 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
     unsigned TypeWidth = BO->getType()->getScalarSizeInBits();
     // We only accept shifts-by-a-constant in CanEvaluateShifted.
     ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
-    
+
     // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
     if (!isLeftShift) {
       // If this is oversized composite shift, then unsigned shifts get 0.
       unsigned NewShAmt = NumBits+CI->getZExtValue();
       if (NewShAmt >= TypeWidth)
         return Constant::getNullValue(BO->getType());
-      
+
       BO->setOperand(1, ConstantInt::get(BO->getType(), NewShAmt));
       BO->setIsExact(false);
       return I;
     }
-    
+
     // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have
     // zeros.
     if (CI->getValue() == NumBits) {
@@ -280,7 +280,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
       }
       return V;
     }
-    
+
     // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
     // the and won't be needed.
     assert(CI->getZExtValue() > NumBits);
@@ -289,7 +289,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
     BO->setIsExact(false);
     return BO;
   }
-    
+
   case Instruction::Select:
     I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
     I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
@@ -304,7 +304,7 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
                                               NumBits, isLeftShift, IC));
     return PN;
   }
-  }      
+  }
 }
 
 
@@ -312,24 +312,24 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
 Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
                                                BinaryOperator &I) {
   bool isLeftShift = I.getOpcode() == Instruction::Shl;
-  
-  
+
+
   // See if we can propagate this shift into the input, this covers the trivial
   // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
   if (I.getOpcode() != Instruction::AShr &&
       CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) {
     DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
               " to eliminate shift:\n  IN: " << *Op0 << "\n  SH: " << I <<"\n");
-    
-    return ReplaceInstUsesWith(I, 
+
+    return ReplaceInstUsesWith(I,
                  GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this));
   }
-  
-  
-  // See if we can simplify any instructions used by the instruction whose sole 
+
+
+  // See if we can simplify any instructions used by the instruction whose sole
   // purpose is to compute bits we don't care about.
   uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
-  
+
   // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
   // a signed shift.
   //
@@ -340,14 +340,14 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
     return &I;
   }
-  
+
   // ((X*C1) << C2) == (X * (C1 << C2))
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
     if (BO->getOpcode() == Instruction::Mul && isLeftShift)
       if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
         return BinaryOperator::CreateMul(BO->getOperand(0),
                                         ConstantExpr::getShl(BOOp, Op1));
-  
+
   // Try to fold constant and into select arguments.
   if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
     if (Instruction *R = FoldOpIntoSelect(I, SI))
@@ -355,7 +355,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
   if (isa<PHINode>(Op0))
     if (Instruction *NV = FoldOpIntoPhi(I))
       return NV;
-  
+
   // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
   if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
     Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
@@ -364,7 +364,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     // require that the input operand is a shift-by-constant so that we have
     // confidence that the shifts will get folded together.  We could do this
     // xform in more cases, but it is unlikely to be profitable.
-    if (TrOp && I.isLogicalShift() && TrOp->isShift() && 
+    if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
         isa<ConstantInt>(TrOp->getOperand(1))) {
       // Okay, we'll do this xform.  Make the shift of shift.
       Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
@@ -378,7 +378,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
       unsigned DstSize = TI->getType()->getScalarSizeInBits();
       APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
-      
+
       // The mask we constructed says what the trunc would do if occurring
       // between the shifts.  We want to know the effect *after* the second
       // shift.  We know that it is a logical shift by a constant, so adjust the
@@ -399,7 +399,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
       return new TruncInst(And, I.getType());
     }
   }
-  
+
   if (Op0->hasOneUse()) {
     if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
       // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
@@ -425,14 +425,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
                      APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
         }
-        
+
         // Turn (Y + ((X >> C) & CC)) << C  ->  ((X & (CC << C)) + (Y << C))
         Value *Op0BOOp1 = Op0BO->getOperand(1);
         if (isLeftShift && Op0BOOp1->hasOneUse() &&
-            match(Op0BOOp1, 
-                  m_And(m_Shr(m_Value(V1), m_Specific(Op1)),
-                        m_ConstantInt(CC))) &&
-            cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) {
+            match(Op0BOOp1,
+                  m_And(m_OneUse(m_Shr(m_Value(V1), m_Specific(Op1))),
+                        m_ConstantInt(CC)))) {
           Value *YS =   // (Y << C)
             Builder->CreateShl(Op0BO->getOperand(0), Op1,
                                          Op0BO->getName());
@@ -442,7 +441,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
         }
       }
-        
+
       // FALL THROUGH.
       case Instruction::Sub: {
         // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
@@ -458,34 +457,32 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
                      APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
         }
-        
+
         // Turn (((X >> C)&CC) + Y) << C  ->  (X + (Y << C)) & (CC << C)
         if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
             match(Op0BO->getOperand(0),
-                  m_And(m_Shr(m_Value(V1), m_Value(V2)),
-                        m_ConstantInt(CC))) && V2 == Op1 &&
-            cast<BinaryOperator>(Op0BO->getOperand(0))
-                ->getOperand(0)->hasOneUse()) {
+                  m_And(m_OneUse(m_Shr(m_Value(V1), m_Value(V2))),
+                        m_ConstantInt(CC))) && V2 == Op1) {
           Value *YS = // (Y << C)
             Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
           // X & (CC << C)
           Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
                                          V1->getName()+".mask");
-          
+
           return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
         }
-        
+
         break;
       }
       }
-      
-      
+
+
       // If the operand is an bitwise operator with a constant RHS, and the
       // shift is the only use, we can pull it out of the shift.
       if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
         bool isValid = true;     // Valid only for And, Or, Xor
         bool highBitSet = false; // Transform if high bit of constant set?
-        
+
         switch (Op0BO->getOpcode()) {
         default: isValid = false; break;   // Do not perform transform!
         case Instruction::Add:
@@ -499,7 +496,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           highBitSet = true;
           break;
         }
-        
+
         // If this is a signed shift right, and the high bit is modified
         // by the logical operation, do not perform the transformation.
         // The highBitSet boolean indicates the value of the high bit of
@@ -508,26 +505,26 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
         //
         if (isValid && I.getOpcode() == Instruction::AShr)
           isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
-        
+
         if (isValid) {
           Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
-          
+
           Value *NewShift =
             Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
           NewShift->takeName(Op0BO);
-          
+
           return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
                                         NewRHS);
         }
       }
     }
   }
-  
+
   // Find out if this is a shift of a shift by a constant.
   BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
   if (ShiftOp && !ShiftOp->isShift())
     ShiftOp = 0;
-  
+
   if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
 
     // This is a constant shift of a constant shift. Be careful about hiding
@@ -548,9 +545,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
     assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
     if (ShiftAmt1 == 0) return 0;  // Will be simplified in the future.
     Value *X = ShiftOp->getOperand(0);
-    
+
     IntegerType *Ty = cast<IntegerType>(I.getType());
-    
+
     // Check for (X << c1) << c2  and  (X >> c1) >> c2
     if (I.getOpcode() == ShiftOp->getOpcode()) {
       uint32_t AmtSum = ShiftAmt1+ShiftAmt2;   // Fold into one big shift.
@@ -561,11 +558,11 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
         AmtSum = TypeBits-1;  // Saturate to 31 for i32 ashr.
       }
-      
+
       return BinaryOperator::Create(I.getOpcode(), X,
                                     ConstantInt::get(Ty, AmtSum));
     }
-    
+
     if (ShiftAmt1 == ShiftAmt2) {
       // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
       if (I.getOpcode() == Instruction::LShr &&
@@ -605,7 +602,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return NewLShr;
         }
         Value *Shift = Builder->CreateLShr(X, ShiftDiffCst);
-        
+
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
         return BinaryOperator::CreateAnd(Shift,
                                          ConstantInt::get(I.getContext(),Mask));
@@ -653,12 +650,12 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
           return NewShl;
         }
         Value *Shift = Builder->CreateShl(X, ShiftDiffCst);
-        
+
         APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
         return BinaryOperator::CreateAnd(Shift,
                                          ConstantInt::get(I.getContext(),Mask));
       }
-      
+
       // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
       // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
       if (I.getOpcode() == Instruction::AShr &&
@@ -682,21 +679,21 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
                                  I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
                                  TD))
     return ReplaceInstUsesWith(I, V);
-  
+
   if (Instruction *V = commonShiftTransforms(I))
     return V;
-  
+
   if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) {
     unsigned ShAmt = Op1C->getZExtValue();
-    
+
     // If the shifted-out value is known-zero, then this is a NUW shift.
-    if (!I.hasNoUnsignedWrap() && 
+    if (!I.hasNoUnsignedWrap() &&
         MaskedValueIsZero(I.getOperand(0),
                           APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) {
           I.setHasNoUnsignedWrap();
           return &I;
         }
-    
+
     // If the shifted out value is all signbits, this is a NSW shift.
     if (!I.hasNoSignedWrap() &&
         ComputeNumSignBits(I.getOperand(0)) > ShAmt) {
@@ -712,7 +709,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
       match(I.getOperand(1), m_Constant(C2)))
     return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A);
 
-  return 0;    
+  return 0;
 }
 
 Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
@@ -722,9 +719,9 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
 
   if (Instruction *R = commonShiftTransforms(I))
     return R;
-  
+
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  
+
   if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
     unsigned ShAmt = Op1C->getZExtValue();
 
@@ -743,15 +740,15 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
         return new ZExtInst(Cmp, II->getType());
       }
     }
-  
+
     // If the shifted-out value is known-zero, then this is an exact shift.
-    if (!I.isExact() && 
+    if (!I.isExact() &&
         MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
       I.setIsExact();
       return &I;
-    }    
+    }
   }
-  
+
   return 0;
 }
 
@@ -762,12 +759,12 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
 
   if (Instruction *R = commonShiftTransforms(I))
     return R;
-  
+
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
     unsigned ShAmt = Op1C->getZExtValue();
-    
+
     // If the input is a SHL by the same constant (ashr (shl X, C), C), then we
     // have a sign-extend idiom.
     Value *X;
@@ -791,23 +788,23 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
     }
 
     // If the shifted-out value is known-zero, then this is an exact shift.
-    if (!I.isExact() && 
+    if (!I.isExact() &&
         MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
       I.setIsExact();
       return &I;
     }
-  }            
-  
+  }
+
   // See if we can turn a signed shr into an unsigned shr.
   if (MaskedValueIsZero(Op0,
                         APInt::getSignBit(I.getType()->getScalarSizeInBits())))
     return BinaryOperator::CreateLShr(Op0, Op1);
-  
+
   // Arithmetic shifting an all-sign-bit value is a no-op.
   unsigned NumSignBits = ComputeNumSignBits(Op0);
   if (NumSignBits == Op0->getType()->getScalarSizeInBits())
     return ReplaceInstUsesWith(I, Op0);
-  
+
   return 0;
 }
 
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 602b203371..08aedb3200 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -16,9 +16,10 @@
 #include "InstCombine.h"
 #include "llvm/DataLayout.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Support/PatternMatch.h"
 
 using namespace llvm;
-
+using namespace llvm::PatternMatch;
 
 /// ShrinkDemandedConstant - Check to see if the specified operand of the 
 /// specified instruction is a constant integer.  If so, check to see if there
@@ -199,8 +200,21 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
       if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 
           (DemandedMask & (~LHSKnownZero)))
         return I->getOperand(1);
+    } else if (I->getOpcode() == Instruction::Xor) {
+      // We can simplify (X^Y) -> X or Y in the user's context if we know that
+      // only bits from X or Y are demanded.
+      
+      ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+      ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+      
+      // If all of the demanded bits are known zero on one side, return the
+      // other. 
+      if ((DemandedMask & RHSKnownZero) == DemandedMask)
+        return I->getOperand(0);
+      if ((DemandedMask & LHSKnownZero) == DemandedMask)
+        return I->getOperand(1);
     }
-    
+
     // Compute the KnownZero/KnownOne bits to simplify things downstream.
     ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
     return 0;
@@ -580,6 +594,17 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     break;
   case Instruction::Shl:
     if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      {
+        Value *VarX; ConstantInt *C1;
+        if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) {
+          Instruction *Shr = cast<Instruction>(I->getOperand(0));
+          Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask,
+                                                KnownZero, KnownOne);
+          if (R)
+            return R;
+        }
+      }
+
       uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
       APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
       
@@ -800,6 +825,79 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
   return 0;
 }
 
+/// Helper routine of SimplifyDemandedUseBits. It tries to simplify
+/// "E1 = (X lsr C1) << C2", where the C1 and C2 are constant, into
+/// "E2 = X << (C2 - C1)" or "E2 = X >> (C1 - C2)", depending on the sign
+/// of "C2-C1".
+///
+/// Suppose E1 and E2 are generally different in bits S={bm, bm+1,
+/// ..., bn}, without considering the specific value X is holding.
+/// This transformation is legal iff one of following conditions is hold:
+///  1) All the bit in S are 0, in this case E1 == E2.
+///  2) We don't care those bits in S, per the input DemandedMask.
+///  3) Combination of 1) and 2). Some bits in S are 0, and we don't care the
+///     rest bits.
+///
+/// Currently we only test condition 2).
+///
+/// As with SimplifyDemandedUseBits, it returns NULL if the simplification was
+/// not successful.
+Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
+  Instruction *Shl, APInt DemandedMask, APInt &KnownZero, APInt &KnownOne) {
+
+  unsigned ShlAmt = cast<ConstantInt>(Shl->getOperand(1))->getZExtValue();
+  unsigned ShrAmt = cast<ConstantInt>(Shr->getOperand(1))->getZExtValue();
+
+  KnownOne.clearAllBits();
+  KnownZero = APInt::getBitsSet(KnownZero.getBitWidth(), 0, ShlAmt-1);
+  KnownZero &= DemandedMask;
+
+  if (ShlAmt == 0 || ShrAmt == 0)
+    return 0;
+
+  Value *VarX = Shr->getOperand(0);
+  Type *Ty = VarX->getType();
+
+  APInt BitMask1(Ty->getIntegerBitWidth(), (uint64_t)-1);
+  APInt BitMask2(Ty->getIntegerBitWidth(), (uint64_t)-1);
+
+  bool isLshr = (Shr->getOpcode() == Instruction::LShr);
+  BitMask1 = isLshr ? (BitMask1.lshr(ShrAmt) << ShlAmt) :
+                      (BitMask1.ashr(ShrAmt) << ShlAmt);
+
+  if (ShrAmt <= ShlAmt) {
+    BitMask2 <<= (ShlAmt - ShrAmt);
+  } else {
+    BitMask2 = isLshr ? BitMask2.lshr(ShrAmt - ShlAmt):
+                        BitMask2.ashr(ShrAmt - ShlAmt);
+  }
+
+  // Check if condition-2 (see the comment to this function) is satified.
+  if ((BitMask1 & DemandedMask) == (BitMask2 & DemandedMask)) {
+    if (ShrAmt == ShlAmt)
+      return VarX;
+
+    if (!Shr->hasOneUse())
+      return 0;
+
+    BinaryOperator *New;
+    if (ShrAmt < ShlAmt) {
+      Constant *Amt = ConstantInt::get(VarX->getType(), ShlAmt - ShrAmt);
+      New = BinaryOperator::CreateShl(VarX, Amt);
+      BinaryOperator *Orig = cast<BinaryOperator>(Shl);
+      New->setHasNoSignedWrap(Orig->hasNoSignedWrap());
+      New->setHasNoUnsignedWrap(Orig->hasNoUnsignedWrap());
+    } else {
+      Constant *Amt = ConstantInt::get(VarX->getType(), ShrAmt - ShlAmt);
+      New = isLshr ? BinaryOperator::CreateLShr(VarX, Amt) :
+                     BinaryOperator::CreateAShr(VarX, Amt);
+    }
+
+    return InsertNewInstWith(New, *Shl);
+  }
+
+  return 0;
+}
 
 /// SimplifyDemandedVectorElts - The specified value produces a vector with
 /// any number of elements. DemandedElts contains the set of elements that are
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index ea654ae9ed..b1a4966920 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -11,11 +11,11 @@
 #define INSTCOMBINE_WORKLIST_H
 
 #define DEBUG_TYPE "instcombine"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Instruction.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index feef2ccee4..9da58d0e71 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -36,23 +36,23 @@
 #define DEBUG_TYPE "instcombine"
 #include "llvm/Transforms/Scalar.h"
 #include "InstCombine.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/ValueHandle.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm-c/Initialization.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <climits>
 using namespace llvm;
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 4e05c3200c..f095cff33c 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -15,14 +15,8 @@
 
 #define DEBUG_TYPE "asan"
 
+#include "llvm/Transforms/Instrumentation.h"
 #include "BlackList.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallSet.h"
@@ -30,19 +24,24 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
-
-#include <string>
+#include "llvm/Type.h"
 #include <algorithm>
+#include <string>
 
 using namespace llvm;
 
@@ -70,6 +69,9 @@ static const char *kAsanMappingScaleName = "__asan_mapping_scale";
 static const char *kAsanStackMallocName = "__asan_stack_malloc";
 static const char *kAsanStackFreeName = "__asan_stack_free";
 static const char *kAsanGenPrefix = "__asan_gen_";
+static const char *kAsanPoisonStackMemoryName = "__asan_poison_stack_memory";
+static const char *kAsanUnpoisonStackMemoryName =
+    "__asan_unpoison_stack_memory";
 
 static const int kAsanStackLeftRedzoneMagic = 0xf1;
 static const int kAsanStackMidRedzoneMagic = 0xf2;
@@ -113,9 +115,10 @@ static cl::opt<bool> ClInitializers("asan-initialization-order",
        cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
 static cl::opt<bool> ClMemIntrin("asan-memintrin",
        cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true));
-// This flag may need to be replaced with -fasan-blacklist.
-static cl::opt<std::string>  ClBlackListFile("asan-blacklist",
-       cl::desc("File containing the list of functions to ignore "
+static cl::opt<bool> ClRealignStack("asan-realign-stack",
+       cl::desc("Realign stack to 32"), cl::Hidden, cl::init(true));
+static cl::opt<std::string> ClBlacklistFile("asan-blacklist",
+       cl::desc("File containing the list of objects to ignore "
                 "during instrumentation"), cl::Hidden);
 
 // These flags allow to change the shadow mapping.
@@ -136,6 +139,10 @@ static cl::opt<bool> ClOptSameTemp("asan-opt-same-temp",
 static cl::opt<bool> ClOptGlobals("asan-opt-globals",
        cl::desc("Don't instrument scalar globals"), cl::Hidden, cl::init(true));
 
+static cl::opt<bool> ClCheckLifetime("asan-check-lifetime",
+       cl::desc("Use llvm.lifetime intrinsics to insert extra checks"),
+       cl::Hidden, cl::init(false));
+
 // Debug flags.
 static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
                             cl::init(0));
@@ -186,8 +193,19 @@ static size_t RedzoneSize() {
 
 /// AddressSanitizer: instrument the code in module to find memory bugs.
 struct AddressSanitizer : public FunctionPass {
-  AddressSanitizer();
-  virtual const char *getPassName() const;
+  AddressSanitizer(bool CheckInitOrder = false,
+                   bool CheckUseAfterReturn = false,
+                   bool CheckLifetime = false,
+                   StringRef BlacklistFile = StringRef())
+      : FunctionPass(ID),
+        CheckInitOrder(CheckInitOrder || ClInitializers),
+        CheckUseAfterReturn(CheckUseAfterReturn || ClUseAfterReturn),
+        CheckLifetime(CheckLifetime || ClCheckLifetime),
+        BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
+                                            : BlacklistFile) {}
+  virtual const char *getPassName() const {
+    return "AddressSanitizerFunctionPass";
+  }
   void instrumentMop(Instruction *I);
   void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB,
                          Value *Addr, uint32_t TypeSize, bool IsWrite);
@@ -206,10 +224,10 @@ struct AddressSanitizer : public FunctionPass {
   bool maybeInsertAsanInitAtFunctionEntry(Function &F);
   bool poisonStackInFunction(Function &F);
   virtual bool doInitialization(Module &M);
-  virtual bool doFinalization(Module &M);
   static char ID;  // Pass identification, replacement for typeid
 
  private:
+  void initializeCallbacks(Module &M);
   uint64_t getAllocaSizeInBytes(AllocaInst *AI) {
     Type *Ty = AI->getAllocatedType();
     uint64_t SizeInBytes = TD->getTypeAllocSize(Ty);
@@ -229,7 +247,18 @@ struct AddressSanitizer : public FunctionPass {
                    Value *ShadowBase, bool DoPoison);
   bool LooksLikeCodeInBug11395(Instruction *I);
   void FindDynamicInitializers(Module &M);
-
+  /// Analyze lifetime intrinsics for given alloca. Use Value* instead of
+  /// AllocaInst* here, as we call this method after we merge all allocas into a
+  /// single one. Returns true if ASan added some instrumentation.
+  bool handleAllocaLifetime(Value *Alloca);
+  /// Analyze lifetime intrinsics for a specific value, casted from alloca.
+  /// Returns true if if ASan added some instrumentation.
+  bool handleValueLifetime(Value *V);
+  void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> IRB, bool DoPoison);
+
+  bool CheckInitOrder;
+  bool CheckUseAfterReturn;
+  bool CheckLifetime;
   LLVMContext *C;
   DataLayout *TD;
   uint64_t MappingOffset;
@@ -239,7 +268,9 @@ struct AddressSanitizer : public FunctionPass {
   Function *AsanCtorFunction;
   Function *AsanInitFunction;
   Function *AsanStackMallocFunc, *AsanStackFreeFunc;
+  Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc;
   Function *AsanHandleNoReturnFunc;
+  SmallString<64> BlacklistFile;
   OwningPtr<BlackList> BL;
   // This array is indexed by AccessIsWrite and log2(AccessSize).
   Function *AsanErrorCallback[2][kNumberOfAccessSizes];
@@ -247,19 +278,32 @@ struct AddressSanitizer : public FunctionPass {
   SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
 };
 
-// FIXME: inherit this from ModulePass and actually use it as a ModulePass.
-class AddressSanitizerCreateGlobalRedzonesPass {
+class AddressSanitizerModule : public ModulePass {
  public:
-  bool runOnModule(Module &M, DataLayout *TD);
+  AddressSanitizerModule(bool CheckInitOrder = false,
+                         StringRef BlacklistFile = StringRef())
+      : ModulePass(ID),
+        CheckInitOrder(CheckInitOrder || ClInitializers),
+        BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
+                                            : BlacklistFile) {}
+  bool runOnModule(Module &M);
+  static char ID;  // Pass identification, replacement for typeid
+  virtual const char *getPassName() const {
+    return "AddressSanitizerModule";
+  }
+
  private:
   bool ShouldInstrumentGlobal(GlobalVariable *G);
   void createInitializerPoisonCalls(Module &M, Value *FirstAddr,
                                     Value *LastAddr);
 
+  bool CheckInitOrder;
+  SmallString<64> BlacklistFile;
   OwningPtr<BlackList> BL;
   SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
   Type *IntptrTy;
   LLVMContext *C;
+  DataLayout *TD;
 };
 
 }  // namespace
@@ -268,13 +312,20 @@ char AddressSanitizer::ID = 0;
 INITIALIZE_PASS(AddressSanitizer, "asan",
     "AddressSanitizer: detects use-after-free and out-of-bounds bugs.",
     false, false)
-AddressSanitizer::AddressSanitizer() : FunctionPass(ID) { }
-FunctionPass *llvm::createAddressSanitizerPass() {
-  return new AddressSanitizer();
+FunctionPass *llvm::createAddressSanitizerFunctionPass(
+    bool CheckInitOrder, bool CheckUseAfterReturn, bool CheckLifetime,
+    StringRef BlacklistFile) {
+  return new AddressSanitizer(CheckInitOrder, CheckUseAfterReturn,
+                              CheckLifetime, BlacklistFile);
 }
 
-const char *AddressSanitizer::getPassName() const {
-  return "AddressSanitizer";
+char AddressSanitizerModule::ID = 0;
+INITIALIZE_PASS(AddressSanitizerModule, "asan-module",
+    "AddressSanitizer: detects use-after-free and out-of-bounds bugs."
+    "ModulePass", false, false)
+ModulePass *llvm::createAddressSanitizerModulePass(
+    bool CheckInitOrder, StringRef BlacklistFile) {
+  return new AddressSanitizerModule(CheckInitOrder, BlacklistFile);
 }
 
 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
@@ -385,7 +436,7 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
     if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
       // If initialization order checking is disabled, a simple access to a
       // dynamically initialized global is always valid.
-      if (!ClInitializers)
+      if (!CheckInitOrder)
         return;
       // If a global variable does not have dynamic initialization we don't
       // have to instrument it.  However, if a global does not have initailizer
@@ -492,7 +543,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
   Crash->setDebugLoc(OrigIns->getDebugLoc());
 }
 
-void AddressSanitizerCreateGlobalRedzonesPass::createInitializerPoisonCalls(
+void AddressSanitizerModule::createInitializerPoisonCalls(
     Module &M, Value *FirstAddr, Value *LastAddr) {
   // We do all of our poisoning and unpoisoning within _GLOBAL__I_a.
   Function *GlobalInit = M.getFunction("_GLOBAL__I_a");
@@ -524,8 +575,7 @@ void AddressSanitizerCreateGlobalRedzonesPass::createInitializerPoisonCalls(
   }
 }
 
-bool AddressSanitizerCreateGlobalRedzonesPass::ShouldInstrumentGlobal(
-    GlobalVariable *G) {
+bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
   Type *Ty = cast<PointerType>(G->getType())->getElementType();
   DEBUG(dbgs() << "GLOBAL: " << *G << "\n");
 
@@ -587,9 +637,13 @@ bool AddressSanitizerCreateGlobalRedzonesPass::ShouldInstrumentGlobal(
 // This function replaces all global variables with new variables that have
 // trailing redzones. It also creates a function that poisons
 // redzones and inserts this function into llvm.global_ctors.
-bool AddressSanitizerCreateGlobalRedzonesPass::runOnModule(Module &M,
-                                                           DataLayout *TD) {
-  BL.reset(new BlackList(ClBlackListFile));
+bool AddressSanitizerModule::runOnModule(Module &M) {
+  if (!ClGlobals) return false;
+  TD = getAnalysisIfAvailable<DataLayout>();
+  if (!TD)
+    return false;
+  BL.reset(new BlackList(BlacklistFile));
+  if (BL->isIn(M)) return false;
   DynamicallyInitializedGlobals.Init(M);
   C = &(M.getContext());
   IntptrTy = Type::getIntNTy(*C, TD->getPointerSizeInBits());
@@ -677,7 +731,7 @@ bool AddressSanitizerCreateGlobalRedzonesPass::runOnModule(Module &M,
         NULL);
 
     // Populate the first and last globals declared in this TU.
-    if (ClInitializers && GlobalHasDynamicInitializer) {
+    if (CheckInitOrder && GlobalHasDynamicInitializer) {
       LastDynamic = ConstantExpr::getPointerCast(NewGlobal, IntptrTy);
       if (FirstDynamic == 0)
         FirstDynamic = LastDynamic;
@@ -692,7 +746,7 @@ bool AddressSanitizerCreateGlobalRedzonesPass::runOnModule(Module &M,
       ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
 
   // Create calls for poisoning before initializers run and unpoisoning after.
-  if (ClInitializers && FirstDynamic && LastDynamic)
+  if (CheckInitOrder && FirstDynamic && LastDynamic)
     createInitializerPoisonCalls(M, FirstDynamic, LastDynamic);
 
   Function *AsanRegisterGlobals = checkInterfaceFunction(M.getOrInsertFunction(
@@ -726,32 +780,8 @@ bool AddressSanitizerCreateGlobalRedzonesPass::runOnModule(Module &M,
   return true;
 }
 
-// virtual
-bool AddressSanitizer::doInitialization(Module &M) {
-  // Initialize the private fields. No one has accessed them before.
-  TD = getAnalysisIfAvailable<DataLayout>();
-
-  if (!TD)
-    return false;
-  BL.reset(new BlackList(ClBlackListFile));
-  DynamicallyInitializedGlobals.Init(M);
-
-  C = &(M.getContext());
-  LongSize = TD->getPointerSizeInBits();
-  IntptrTy = Type::getIntNTy(*C, LongSize);
-  IntptrPtrTy = PointerType::get(IntptrTy, 0);
-
-  AsanCtorFunction = Function::Create(
-      FunctionType::get(Type::getVoidTy(*C), false),
-      GlobalValue::InternalLinkage, kAsanModuleCtorName, &M);
-  BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction);
-  // call __asan_init in the module ctor.
-  IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB));
-  AsanInitFunction = checkInterfaceFunction(
-      M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), NULL));
-  AsanInitFunction->setLinkage(Function::ExternalLinkage);
-  IRB.CreateCall(AsanInitFunction);
-
+void AddressSanitizer::initializeCallbacks(Module &M) {
+  IRBuilder<> IRB(*C);
   // Create __asan_report* callbacks.
   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
     for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
@@ -773,11 +803,42 @@ bool AddressSanitizer::doInitialization(Module &M) {
       IntptrTy, IntptrTy, IntptrTy, NULL));
   AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
       kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
+  AsanPoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanPoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+  AsanUnpoisonStackMemoryFunc = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanUnpoisonStackMemoryName, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
 
   // We insert an empty inline asm after __asan_report* to avoid callback merge.
   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
                             StringRef(""), StringRef(""),
                             /*hasSideEffects=*/true);
+}
+
+// virtual
+bool AddressSanitizer::doInitialization(Module &M) {
+  // Initialize the private fields. No one has accessed them before.
+  TD = getAnalysisIfAvailable<DataLayout>();
+
+  if (!TD)
+    return false;
+  BL.reset(new BlackList(BlacklistFile));
+  DynamicallyInitializedGlobals.Init(M);
+
+  C = &(M.getContext());
+  LongSize = TD->getPointerSizeInBits();
+  IntptrTy = Type::getIntNTy(*C, LongSize);
+  IntptrPtrTy = PointerType::get(IntptrTy, 0);
+
+  AsanCtorFunction = Function::Create(
+      FunctionType::get(Type::getVoidTy(*C), false),
+      GlobalValue::InternalLinkage, kAsanModuleCtorName, &M);
+  BasicBlock *AsanCtorBB = BasicBlock::Create(*C, "", AsanCtorFunction);
+  // call __asan_init in the module ctor.
+  IRBuilder<> IRB(ReturnInst::Create(*C, AsanCtorBB));
+  AsanInitFunction = checkInterfaceFunction(
+      M.getOrInsertFunction(kAsanInitName, IRB.getVoidTy(), NULL));
+  AsanInitFunction->setLinkage(Function::ExternalLinkage);
+  IRB.CreateCall(AsanInitFunction);
 
   llvm::Triple targetTriple(M.getTargetTriple());
   bool isAndroid = targetTriple.getEnvironment() == llvm::Triple::Android;
@@ -817,18 +878,6 @@ bool AddressSanitizer::doInitialization(Module &M) {
   return true;
 }
 
-bool AddressSanitizer::doFinalization(Module &M) {
-  // We transform the globals at the very end so that the optimization analysis
-  // works on the original globals.
-  if (ClGlobals) {
-    // FIXME: instead of doFinalization, run this as a true ModulePass.
-    AddressSanitizerCreateGlobalRedzonesPass Pass;
-    return Pass.runOnModule(M, TD);
-  }
-  return false;
-}
-
-
 bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
   // For each NSObject descendant having a +load method, this method is invoked
   // by the ObjC runtime before any of the static constructors is called.
@@ -849,6 +898,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
   if (BL->isIn(F)) return false;
   if (&F == AsanCtorFunction) return false;
   DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
+  initializeCallbacks(*F.getParent());
 
   // If needed, insert __asan_init before checking for AddressSafety attr.
   maybeInsertAsanInitAtFunctionEntry(F);
@@ -1021,6 +1071,74 @@ bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
   return true;
 }
 
+// Handling llvm.lifetime intrinsics for a given %alloca:
+// (1) collect all llvm.lifetime.xxx(%size, %value) describing the alloca.
+// (2) if %size is constant, poison memory for llvm.lifetime.end (to detect
+//     invalid accesses) and unpoison it for llvm.lifetime.start (the memory
+//     could be poisoned by previous llvm.lifetime.end instruction, as the
+//     variable may go in and out of scope several times, e.g. in loops).
+// (3) if we poisoned at least one %alloca in a function,
+//     unpoison the whole stack frame at function exit.
+bool AddressSanitizer::handleAllocaLifetime(Value *Alloca) {
+  assert(CheckLifetime);
+  Type *AllocaType = Alloca->getType();
+  Type *Int8PtrTy = Type::getInt8PtrTy(AllocaType->getContext());
+
+  bool Res = false;
+  // Typical code looks like this:
+  // %alloca = alloca <type>, <alignment>
+  // ... some code ...
+  // %val1 = bitcast <type>* %alloca to i8*
+  // call void @llvm.lifetime.start(i64 <size>, i8* %val1)
+  // ... more code ...
+  // %val2 = bitcast <type>* %alloca to i8*
+  // call void @llvm.lifetime.start(i64 <size>, i8* %val2)
+  // That is, to handle %alloca we must find all its casts to
+  // i8* values, and find lifetime instructions for these values.
+  if (AllocaType == Int8PtrTy)
+    Res |= handleValueLifetime(Alloca);
+  for (Value::use_iterator UI = Alloca->use_begin(), UE = Alloca->use_end();
+       UI != UE; ++UI) {
+    if (UI->getType() != Int8PtrTy) continue;
+    if (UI->stripPointerCasts() != Alloca) continue;
+    Res |= handleValueLifetime(*UI);
+  }
+  return Res;
+}
+
+bool AddressSanitizer::handleValueLifetime(Value *V) {
+  assert(CheckLifetime);
+  bool Res = false;
+  for (Value::use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE;
+       ++UI) {
+    IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI);
+    if (!II) continue;
+    Intrinsic::ID ID = II->getIntrinsicID();
+    if (ID != Intrinsic::lifetime_start &&
+        ID != Intrinsic::lifetime_end)
+      continue;
+    if (V != II->getArgOperand(1))
+      continue;
+    // Found lifetime intrinsic, add ASan instrumentation if necessary.
+    ConstantInt *Size = dyn_cast<ConstantInt>(II->getArgOperand(0));
+    // If size argument is undefined, don't do anything.
+    if (Size->isMinusOne())
+      continue;
+    // Check that size doesn't saturate uint64_t and can
+    // be stored in IntptrTy.
+    const uint64_t SizeValue = Size->getValue().getLimitedValue();
+    if (SizeValue == ~0ULL ||
+        !ConstantInt::isValueValidForType(IntptrTy, SizeValue)) {
+      continue;
+    }
+    IRBuilder<> IRB(II);
+    bool DoPoison = (ID == Intrinsic::lifetime_end);
+    poisonAlloca(V, SizeValue, IRB, DoPoison);
+    Res = true;
+  }
+  return Res;
+}
+
 // Find all static Alloca instructions and put
 // poisoned red zones around all of them.
 // Then unpoison everything back before the function returns.
@@ -1039,9 +1157,11 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
   SmallVector<AllocaInst*, 16> AllocaVec;
   SmallVector<Instruction*, 8> RetVec;
   uint64_t TotalSize = 0;
+  bool HavePoisonedAllocas = false;
 
   // Filter out Alloca instructions we want (and can) handle.
   // Collect Ret instructions.
+  unsigned ResultAlignment = 1 << MappingScale();
   for (Function::iterator FI = F.begin(), FE = F.end();
        FI != FE; ++FI) {
     BasicBlock &BB = *FI;
@@ -1057,7 +1177,7 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
       if (AI->isArrayAllocation()) continue;
       if (!AI->isStaticAlloca()) continue;
       if (!AI->getAllocatedType()->isSized()) continue;
-      if (AI->getAlignment() > RedzoneSize()) continue;
+      ResultAlignment = std::max(ResultAlignment, AI->getAlignment());
       AllocaVec.push_back(AI);
       uint64_t AlignedSize =  getAlignedAllocaSize(AI);
       TotalSize += AlignedSize;
@@ -1068,7 +1188,7 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
 
   uint64_t LocalStackSize = TotalSize + (AllocaVec.size() + 1) * RedzoneSize();
 
-  bool DoStackMalloc = ClUseAfterReturn
+  bool DoStackMalloc = CheckUseAfterReturn
       && LocalStackSize <= kMaxStackMallocSize;
 
   Instruction *InsBefore = AllocaVec[0];
@@ -1078,7 +1198,9 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
   Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize);
   AllocaInst *MyAlloca =
       new AllocaInst(ByteArrayTy, "MyAlloca", InsBefore);
-  MyAlloca->setAlignment(RedzoneSize());
+  if (ClRealignStack && ResultAlignment < RedzoneSize())
+    ResultAlignment = RedzoneSize();
+  MyAlloca->setAlignment(ResultAlignment);
   assert(MyAlloca->isStaticAlloca());
   Value *OrigStackBase = IRB.CreatePointerCast(MyAlloca, IntptrTy);
   Value *LocalStackBase = OrigStackBase;
@@ -1103,10 +1225,13 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
                      << Name.size() << " " << Name << " ";
     uint64_t AlignedSize = getAlignedAllocaSize(AI);
     assert((AlignedSize % RedzoneSize()) == 0);
-    AI->replaceAllUsesWith(
-        IRB.CreateIntToPtr(
+    Value *NewAllocaPtr = IRB.CreateIntToPtr(
             IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Pos)),
-            AI->getType()));
+            AI->getType());
+    AI->replaceAllUsesWith(NewAllocaPtr);
+    // Analyze lifetime intrinsics only for static allocas we handle.
+    if (CheckLifetime)
+      HavePoisonedAllocas |= handleAllocaLifetime(NewAllocaPtr);
     Pos += AlignedSize + RedzoneSize();
   }
   assert(Pos == LocalStackSize);
@@ -1139,9 +1264,15 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
     PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRBRet, ShadowBase, false);
 
     if (DoStackMalloc) {
+      // In use-after-return mode, mark the whole stack frame unaddressable.
       IRBRet.CreateCall3(AsanStackFreeFunc, LocalStackBase,
                          ConstantInt::get(IntptrTy, LocalStackSize),
                          OrigStackBase);
+    } else if (HavePoisonedAllocas) {
+      // If we poisoned some allocas in llvm.lifetime analysis,
+      // unpoison whole stack frame now.
+      assert(LocalStackBase == OrigStackBase);
+      poisonAlloca(LocalStackBase, LocalStackSize, IRBRet, false);
     }
   }
 
@@ -1155,3 +1286,13 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
 
   return true;
 }
+
+void AddressSanitizer::poisonAlloca(Value *V, uint64_t Size, IRBuilder<> IRB,
+                                    bool DoPoison) {
+  // For now just insert the call to ASan runtime.
+  Value *AddrArg = IRB.CreatePointerCast(V, IntptrTy);
+  Value *SizeArg = ConstantInt::get(IntptrTy, Size);
+  IRB.CreateCall2(DoPoison ? AsanPoisonStackMemoryFunc
+                           : AsanUnpoisonStackMemoryFunc,
+                  AddrArg, SizeArg);
+}
diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp
index e02c631f7f..0bfb186562 100644
--- a/lib/Transforms/Instrumentation/BlackList.cpp
+++ b/lib/Transforms/Instrumentation/BlackList.cpp
@@ -13,9 +13,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <utility>
-#include <string>
-
 #include "BlackList.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallVector.h"
@@ -28,12 +25,14 @@
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
+#include <string>
+#include <utility>
 
 namespace llvm {
 
 BlackList::BlackList(const StringRef Path) {
   // Validate and open blacklist file.
-  if (!Path.size()) return;
+  if (Path.empty()) return;
   OwningPtr<MemoryBuffer> File;
   if (error_code EC = MemoryBuffer::getFile(Path, File)) {
     report_fatal_error("Can't open blacklist file: " + Path + ": " +
@@ -53,6 +52,10 @@ BlackList::BlackList(const StringRef Path) {
     std::pair<StringRef, StringRef> SplitLine = I->split(":");
     StringRef Prefix = SplitLine.first;
     std::string Regexp = SplitLine.second;
+    if (Regexp.empty()) {
+      // Missing ':' in the line.
+      report_fatal_error("malformed blacklist line: " + SplitLine.first);
+    }
 
     // Replace * with .*
     for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
@@ -69,7 +72,7 @@ BlackList::BlackList(const StringRef Path) {
     }
 
     // Add this regexp into the proper group by its prefix.
-    if (Regexps[Prefix].size())
+    if (!Regexps[Prefix].empty())
       Regexps[Prefix] += "|";
     Regexps[Prefix] += Regexp;
   }
@@ -110,10 +113,12 @@ bool BlackList::isInInit(const GlobalVariable &G) {
           inSection("global-init-type", GetGVTypeString(G)));
 }
 
-bool BlackList::inSection(const StringRef Section,
-                          const StringRef Query) {
-  Regex *FunctionRegex = Entries[Section];
-  return FunctionRegex ? FunctionRegex->match(Query) : false;
+bool BlackList::inSection(const StringRef Section, const StringRef Query) {
+  StringMap<Regex*>::iterator I = Entries.find(Section);
+  if (I == Entries.end()) return false;
+
+  Regex *FunctionRegex = I->getValue();
+  return FunctionRegex->match(Query);
 }
 
 }  // namespace llvm
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 2d5d5603c0..303e04ac16 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -13,19 +13,19 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "bounds-checking"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/DataLayout.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Pass.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/TargetFolder.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Instrumentation.h"
 using namespace llvm;
 
 static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
@@ -108,6 +108,7 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) {
     else
       Cmp = 0; // unconditional branch
   }
+  ++ChecksAdded;
 
   Instruction *Inst = Builder->GetInsertPoint();
   BasicBlock *OldBB = Inst->getParent();
@@ -162,7 +163,6 @@ bool BoundsChecking::instrument(Value *Ptr, Value *InstVal) {
   }
   emitBranchToTrap(Or);
 
-  ++ChecksAdded;
   return true;
 }
 
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 058f68c7ce..1c9e053679 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -4,6 +4,7 @@ add_llvm_library(LLVMInstrumentation
   BoundsChecking.cpp
   EdgeProfiling.cpp
   GCOVProfiling.cpp
+  MemorySanitizer.cpp
   Instrumentation.cpp
   OptimalEdgeProfiling.cpp
   PathProfiling.cpp
diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
index e8ef2654d2..41e42aff49 100644
--- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -18,13 +18,13 @@
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "insert-edge-profiling"
 
+#include "llvm/Transforms/Instrumentation.h"
 #include "ProfilingUtils.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Instrumentation.h"
-#include "llvm/ADT/Statistic.h"
 #include <set>
 using namespace llvm;
 
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index a8adaa62d7..5e064cd70d 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -16,19 +16,19 @@
 
 #define DEBUG_TYPE "insert-gcov-profiling"
 
-#include "ProfilingUtils.h"
 #include "llvm/Transforms/Instrumentation.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "ProfilingUtils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/UniqueVector.h"
+#include "llvm/DebugInfo.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/DebugLoc.h"
 #include "llvm/Support/InstIterator.h"
@@ -45,11 +45,11 @@ namespace {
     static char ID;
     GCOVProfiler()
         : ModulePass(ID), EmitNotes(true), EmitData(true), Use402Format(false),
-          UseExtraChecksum(false) {
+          UseExtraChecksum(false), NoRedZone(false) {
       initializeGCOVProfilerPass(*PassRegistry::getPassRegistry());
     }
     GCOVProfiler(bool EmitNotes, bool EmitData, bool use402Format = false,
-                 bool useExtraChecksum = false)
+                 bool useExtraChecksum = false, bool NoRedZone = false)
         : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData),
           Use402Format(use402Format), UseExtraChecksum(useExtraChecksum) {
       assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?");
@@ -98,6 +98,7 @@ namespace {
     bool EmitData;
     bool Use402Format;
     bool UseExtraChecksum;
+    bool NoRedZone;
 
     Module *M;
     LLVMContext *Ctx;
@@ -110,8 +111,10 @@ INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling",
 
 ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData,
                                          bool Use402Format,
-                                         bool UseExtraChecksum) {
-  return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum);
+                                         bool UseExtraChecksum,
+                                         bool NoRedZone) {
+  return new GCOVProfiler(EmitNotes, EmitData, Use402Format, UseExtraChecksum,
+                          NoRedZone);
 }
 
 namespace {
@@ -638,6 +641,9 @@ void GCOVProfiler::insertCounterWriteout(
     WriteoutF = Function::Create(WriteoutFTy, GlobalValue::InternalLinkage,
                                  "__llvm_gcov_writeout", M);
   WriteoutF->setUnnamedAddr(true);
+  WriteoutF->addFnAttr(Attributes::NoInline);
+  if (NoRedZone)
+    WriteoutF->addFnAttr(Attributes::NoRedZone);
 
   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", WriteoutF);
   IRBuilder<> Builder(BB);
@@ -683,6 +689,8 @@ void GCOVProfiler::insertCounterWriteout(
   F->setUnnamedAddr(true);
   F->setLinkage(GlobalValue::InternalLinkage);
   F->addFnAttr(Attributes::NoInline);
+  if (NoRedZone)
+    F->addFnAttr(Attributes::NoRedZone);
 
   BB = BasicBlock::Create(*Ctx, "entry", F);
   Builder.SetInsertPoint(BB);
@@ -702,6 +710,8 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
   Fn->setUnnamedAddr(true);
   Fn->setLinkage(GlobalValue::InternalLinkage);
   Fn->addFnAttr(Attributes::NoInline);
+  if (NoRedZone)
+    Fn->addFnAttr(Attributes::NoRedZone);
 
   Type *Int32Ty = Type::getInt32Ty(*Ctx);
   Type *Int64Ty = Type::getInt64Ty(*Ctx);
@@ -758,6 +768,9 @@ insertFlush(ArrayRef<std::pair<GlobalVariable*, MDNode*> > CountersBySP) {
   else
     FlushF->setLinkage(GlobalValue::InternalLinkage);
   FlushF->setUnnamedAddr(true);
+  FlushF->addFnAttr(Attributes::NoInline);
+  if (NoRedZone)
+    FlushF->addFnAttr(Attributes::NoRedZone);
 
   BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF);
 
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index 1e0b4a348a..8ba102559b 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -21,11 +21,13 @@ using namespace llvm;
 /// library.
 void llvm::initializeInstrumentation(PassRegistry &Registry) {
   initializeAddressSanitizerPass(Registry);
+  initializeAddressSanitizerModulePass(Registry);
   initializeBoundsCheckingPass(Registry);
   initializeEdgeProfilerPass(Registry);
   initializeGCOVProfilerPass(Registry);
   initializeOptimalEdgeProfilerPass(Registry);
   initializePathProfilerPass(Registry);
+  initializeMemorySanitizerPass(Registry);
   initializeThreadSanitizerPass(Registry);
 }
 
diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
index a4bb5a66af..50226db8c2 100644
--- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -15,10 +15,10 @@
 #ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
 #define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
 
-#include "llvm/BasicBlock.h"
 #include "llvm/ADT/EquivalenceClasses.h"
-#include <vector>
+#include "llvm/BasicBlock.h"
 #include <algorithm>
+#include <vector>
 
 namespace llvm {
 
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
new file mode 100644
index 0000000000..947a2e3b12
--- /dev/null
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -0,0 +1,1579 @@
+//===-- MemorySanitizer.cpp - detector of uninitialized reads -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file is a part of MemorySanitizer, a detector of uninitialized
+/// reads.
+///
+/// Status: early prototype.
+///
+/// The algorithm of the tool is similar to Memcheck
+/// (http://goo.gl/QKbem). We associate a few shadow bits with every
+/// byte of the application memory, poison the shadow of the malloc-ed
+/// or alloca-ed memory, load the shadow bits on every memory read,
+/// propagate the shadow bits through some of the arithmetic
+/// instruction (including MOV), store the shadow bits on every memory
+/// write, report a bug on some other instructions (e.g. JMP) if the
+/// associated shadow is poisoned.
+///
+/// But there are differences too. The first and the major one:
+/// compiler instrumentation instead of binary instrumentation. This
+/// gives us much better register allocation, possible compiler
+/// optimizations and a fast start-up. But this brings the major issue
+/// as well: msan needs to see all program events, including system
+/// calls and reads/writes in system libraries, so we either need to
+/// compile *everything* with msan or use a binary translation
+/// component (e.g. DynamoRIO) to instrument pre-built libraries.
+/// Another difference from Memcheck is that we use 8 shadow bits per
+/// byte of application memory and use a direct shadow mapping. This
+/// greatly simplifies the instrumentation code and avoids races on
+/// shadow updates (Memcheck is single-threaded so races are not a
+/// concern there. Memcheck uses 2 shadow bits per byte with a slow
+/// path storage that uses 8 bits per byte).
+///
+/// The default value of shadow is 0, which means "clean" (not poisoned).
+///
+/// Every module initializer should call __msan_init to ensure that the
+/// shadow memory is ready. On error, __msan_warning is called. Since
+/// parameters and return values may be passed via registers, we have a
+/// specialized thread-local shadow for return values
+/// (__msan_retval_tls) and parameters (__msan_param_tls).
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msan"
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "BlackList.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/MDBuilder.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Type.h"
+
+using namespace llvm;
+
+static const uint64_t kShadowMask32 = 1ULL << 31;
+static const uint64_t kShadowMask64 = 1ULL << 46;
+static const uint64_t kOriginOffset32 = 1ULL << 30;
+static const uint64_t kOriginOffset64 = 1ULL << 45;
+
+// This is an important flag that makes the reports much more
+// informative at the cost of greater slowdown. Not fully implemented
+// yet.
+// FIXME: this should be a top-level clang flag, e.g.
+// -fmemory-sanitizer-full.
+static cl::opt<bool> ClTrackOrigins("msan-track-origins",
+       cl::desc("Track origins (allocation sites) of poisoned memory"),
+       cl::Hidden, cl::init(false));
+static cl::opt<bool> ClKeepGoing("msan-keep-going",
+       cl::desc("keep going after reporting a UMR"),
+       cl::Hidden, cl::init(false));
+static cl::opt<bool> ClPoisonStack("msan-poison-stack",
+       cl::desc("poison uninitialized stack variables"),
+       cl::Hidden, cl::init(true));
+static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
+       cl::desc("poison uninitialized stack variables with a call"),
+       cl::Hidden, cl::init(false));
+static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
+       cl::desc("poison uninitialized stack variables with the given patter"),
+       cl::Hidden, cl::init(0xff));
+
+static cl::opt<bool> ClHandleICmp("msan-handle-icmp",
+       cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
+       cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClStoreCleanOrigin("msan-store-clean-origin",
+       cl::desc("store origin for clean (fully initialized) values"),
+       cl::Hidden, cl::init(false));
+
+// This flag controls whether we check the shadow of the address
+// operand of load or store. Such bugs are very rare, since load from
+// a garbage address typically results in SEGV, but still happen
+// (e.g. only lower bits of address are garbage, or the access happens
+// early at program startup where malloc-ed memory is more likely to
+// be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
+static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
+       cl::desc("report accesses through a pointer which has poisoned shadow"),
+       cl::Hidden, cl::init(true));
+
+static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
+       cl::desc("print out instructions with default strict semantics"),
+       cl::Hidden, cl::init(false));
+
+static cl::opt<std::string>  ClBlackListFile("msan-blacklist",
+       cl::desc("File containing the list of functions where MemorySanitizer "
+                "should not report bugs"), cl::Hidden);
+
+namespace {
+
+/// \brief An instrumentation pass implementing detection of uninitialized
+/// reads.
+///
+/// MemorySanitizer: instrument the code in module to find
+/// uninitialized reads.
+class MemorySanitizer : public FunctionPass {
+public:
+  MemorySanitizer() : FunctionPass(ID), TD(0), WarningFn(0) { }
+  const char *getPassName() const { return "MemorySanitizer"; }
+  bool runOnFunction(Function &F);
+  bool doInitialization(Module &M);
+  static char ID;  // Pass identification, replacement for typeid.
+
+private:
+  void initializeCallbacks(Module &M);
+
+  DataLayout *TD;
+  LLVMContext *C;
+  Type *IntptrTy;
+  Type *OriginTy;
+  /// \brief Thread-local shadow storage for function parameters.
+  GlobalVariable *ParamTLS;
+  /// \brief Thread-local origin storage for function parameters.
+  GlobalVariable *ParamOriginTLS;
+  /// \brief Thread-local shadow storage for function return value.
+  GlobalVariable *RetvalTLS;
+  /// \brief Thread-local origin storage for function return value.
+  GlobalVariable *RetvalOriginTLS;
+  /// \brief Thread-local shadow storage for in-register va_arg function
+  /// parameters (x86_64-specific).
+  GlobalVariable *VAArgTLS;
+  /// \brief Thread-local shadow storage for va_arg overflow area
+  /// (x86_64-specific).
+  GlobalVariable *VAArgOverflowSizeTLS;
+  /// \brief Thread-local space used to pass origin value to the UMR reporting
+  /// function.
+  GlobalVariable *OriginTLS;
+
+  /// \brief The run-time callback to print a warning.
+  Value *WarningFn;
+  /// \brief Run-time helper that copies origin info for a memory range.
+  Value *MsanCopyOriginFn;
+  /// \brief Run-time helper that generates a new origin value for a stack
+  /// allocation.
+  Value *MsanSetAllocaOriginFn;
+  /// \brief Run-time helper that poisons stack on function entry.
+  Value *MsanPoisonStackFn;
+  /// \brief MSan runtime replacements for memmove, memcpy and memset.
+  Value *MemmoveFn, *MemcpyFn, *MemsetFn;
+
+  /// \brief Address mask used in application-to-shadow address calculation.
+  /// ShadowAddr is computed as ApplicationAddr & ~ShadowMask.
+  uint64_t ShadowMask;
+  /// \brief Offset of the origin shadow from the "normal" shadow.
+  /// OriginAddr is computed as (ShadowAddr + OriginOffset) & ~3ULL
+  uint64_t OriginOffset;
+  /// \brief Branch weights for error reporting.
+  MDNode *ColdCallWeights;
+  /// \brief Branch weights for origin store.
+  MDNode *OriginStoreWeights;
+  /// \brief The blacklist.
+  OwningPtr<BlackList> BL;
+  /// \brief An empty volatile inline asm that prevents callback merge.
+  InlineAsm *EmptyAsm;
+
+  friend struct MemorySanitizerVisitor;
+  friend struct VarArgAMD64Helper;
+};
+}  // namespace
+
+char MemorySanitizer::ID = 0;
+INITIALIZE_PASS(MemorySanitizer, "msan",
+                "MemorySanitizer: detects uninitialized reads.",
+                false, false)
+
+FunctionPass *llvm::createMemorySanitizerPass() {
+  return new MemorySanitizer();
+}
+
+/// \brief Create a non-const global initialized with the given string.
+///
+/// Creates a writable global for Str so that we can pass it to the
+/// run-time lib. Runtime uses first 4 bytes of the string to store the
+/// frame ID, so the string needs to be mutable.
+static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
+                                                            StringRef Str) {
+  Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
+  return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false,
+                            GlobalValue::PrivateLinkage, StrConst, "");
+}
+
+
+/// \brief Insert extern declaration of runtime-provided functions and globals.
+void MemorySanitizer::initializeCallbacks(Module &M) {
+  // Only do this once.
+  if (WarningFn)
+    return;
+
+  IRBuilder<> IRB(*C);
+  // Create the callback.
+  // FIXME: this function should have "Cold" calling conv,
+  // which is not yet implemented.
+  StringRef WarningFnName = ClKeepGoing ? "__msan_warning"
+                                        : "__msan_warning_noreturn";
+  WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), NULL);
+
+  MsanCopyOriginFn = M.getOrInsertFunction(
+    "__msan_copy_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(),
+    IRB.getInt8PtrTy(), IntptrTy, NULL);
+  MsanSetAllocaOriginFn = M.getOrInsertFunction(
+    "__msan_set_alloca_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
+    IRB.getInt8PtrTy(), NULL);
+  MsanPoisonStackFn = M.getOrInsertFunction(
+    "__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, NULL);
+  MemmoveFn = M.getOrInsertFunction(
+    "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+    IntptrTy, NULL);
+  MemcpyFn = M.getOrInsertFunction(
+    "__msan_memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+    IntptrTy, NULL);
+  MemsetFn = M.getOrInsertFunction(
+    "__msan_memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(),
+    IntptrTy, NULL);
+
+  // Create globals.
+  RetvalTLS = new GlobalVariable(
+    M, ArrayType::get(IRB.getInt64Ty(), 8), false,
+    GlobalVariable::ExternalLinkage, 0, "__msan_retval_tls", 0,
+    GlobalVariable::GeneralDynamicTLSModel);
+  RetvalOriginTLS = new GlobalVariable(
+    M, OriginTy, false, GlobalVariable::ExternalLinkage, 0,
+    "__msan_retval_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
+
+  ParamTLS = new GlobalVariable(
+    M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
+    GlobalVariable::ExternalLinkage, 0, "__msan_param_tls", 0,
+    GlobalVariable::GeneralDynamicTLSModel);
+  ParamOriginTLS = new GlobalVariable(
+    M, ArrayType::get(OriginTy, 1000), false, GlobalVariable::ExternalLinkage,
+    0, "__msan_param_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
+
+  VAArgTLS = new GlobalVariable(
+    M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
+    GlobalVariable::ExternalLinkage, 0, "__msan_va_arg_tls", 0,
+    GlobalVariable::GeneralDynamicTLSModel);
+  VAArgOverflowSizeTLS = new GlobalVariable(
+    M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, 0,
+    "__msan_va_arg_overflow_size_tls", 0,
+    GlobalVariable::GeneralDynamicTLSModel);
+  OriginTLS = new GlobalVariable(
+    M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, 0,
+    "__msan_origin_tls", 0, GlobalVariable::GeneralDynamicTLSModel);
+
+  // We insert an empty inline asm after __msan_report* to avoid callback merge.
+  EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
+                            StringRef(""), StringRef(""),
+                            /*hasSideEffects=*/true);
+}
+
+/// \brief Module-level initialization.
+///
+/// inserts a call to __msan_init to the module's constructor list.
+bool MemorySanitizer::doInitialization(Module &M) {
+  TD = getAnalysisIfAvailable<DataLayout>();
+  if (!TD)
+    return false;
+  BL.reset(new BlackList(ClBlackListFile));
+  C = &(M.getContext());
+  unsigned PtrSize = TD->getPointerSizeInBits(/* AddressSpace */0);
+  switch (PtrSize) {
+    case 64:
+      ShadowMask = kShadowMask64;
+      OriginOffset = kOriginOffset64;
+      break;
+    case 32:
+      ShadowMask = kShadowMask32;
+      OriginOffset = kOriginOffset32;
+      break;
+    default:
+      report_fatal_error("unsupported pointer size");
+      break;
+  }
+
+  IRBuilder<> IRB(*C);
+  IntptrTy = IRB.getIntPtrTy(TD);
+  OriginTy = IRB.getInt32Ty();
+
+  ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
+  OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
+
+  // Insert a call to __msan_init/__msan_track_origins into the module's CTORs.
+  appendToGlobalCtors(M, cast<Function>(M.getOrInsertFunction(
+                      "__msan_init", IRB.getVoidTy(), NULL)), 0);
+
+  new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
+                     IRB.getInt32(ClTrackOrigins), "__msan_track_origins");
+
+  return true;
+}
+
+namespace {
+
+/// \brief A helper class that handles instrumentation of VarArg
+/// functions on a particular platform.
+///
+/// Implementations are expected to insert the instrumentation
+/// necessary to propagate argument shadow through VarArg function
+/// calls. Visit* methods are called during an InstVisitor pass over
+/// the function, and should avoid creating new basic blocks. A new
+/// instance of this class is created for each instrumented function.
+struct VarArgHelper {
+  /// \brief Visit a CallSite.
+  virtual void visitCallSite(CallSite &CS, IRBuilder<> &IRB) = 0;
+
+  /// \brief Visit a va_start call.
+  virtual void visitVAStartInst(VAStartInst &I) = 0;
+
+  /// \brief Visit a va_copy call.
+  virtual void visitVACopyInst(VACopyInst &I) = 0;
+
+  /// \brief Finalize function instrumentation.
+  ///
+  /// This method is called after visiting all interesting (see above)
+  /// instructions in a function.
+  virtual void finalizeInstrumentation() = 0;
+
+  virtual ~VarArgHelper() {}
+};
+
+struct MemorySanitizerVisitor;
+
+VarArgHelper*
+CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
+                   MemorySanitizerVisitor &Visitor);
+
+/// This class does all the work for a given function. Store and Load
+/// instructions store and load corresponding shadow and origin
+/// values. Most instructions propagate shadow from arguments to their
+/// return values. Certain instructions (most importantly, BranchInst)
+/// test their argument shadow and print reports (with a runtime call) if it's
+/// non-zero.
+struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
+  Function &F;
+  MemorySanitizer &MS;
+  SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
+  ValueMap<Value*, Value*> ShadowMap, OriginMap;
+  bool InsertChecks;
+  OwningPtr<VarArgHelper> VAHelper;
+
+  // An unfortunate workaround for asymmetric lowering of va_arg stuff.
+  // See a comment in visitCallSite for more details.
+  static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
+  static const unsigned AMD64FpEndOffset = 176;
+
+  struct ShadowOriginAndInsertPoint {
+    Instruction *Shadow;
+    Instruction *Origin;
+    Instruction *OrigIns;
+    ShadowOriginAndInsertPoint(Instruction *S, Instruction *O, Instruction *I)
+      : Shadow(S), Origin(O), OrigIns(I) { }
+    ShadowOriginAndInsertPoint() : Shadow(0), Origin(0), OrigIns(0) { }
+  };
+  SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
+  SmallVector<Instruction*, 16> StoreList;
+
+  MemorySanitizerVisitor(Function &F, MemorySanitizer &MS)
+    : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) {
+    InsertChecks = !MS.BL->isIn(F);
+    DEBUG(if (!InsertChecks)
+            dbgs() << "MemorySanitizer is not inserting checks into '"
+                   << F.getName() << "'\n");
+  }
+
+  void materializeStores() {
+    for (size_t i = 0, n = StoreList.size(); i < n; i++) {
+      StoreInst& I = *dyn_cast<StoreInst>(StoreList[i]);
+
+      IRBuilder<> IRB(&I);
+      Value *Val = I.getValueOperand();
+      Value *Addr = I.getPointerOperand();
+      Value *Shadow = getShadow(Val);
+      Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB);
+
+      StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment());
+      DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
+      (void)NewSI;
+      // If the store is volatile, add a check.
+      if (I.isVolatile())
+        insertCheck(Val, &I);
+      if (ClCheckAccessAddress)
+        insertCheck(Addr, &I);
+
+      if (ClTrackOrigins) {
+        if (ClStoreCleanOrigin || isa<StructType>(Shadow->getType())) {
+          IRB.CreateAlignedStore(getOrigin(Val), getOriginPtr(Addr, IRB), I.getAlignment());
+        } else {
+          Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
+
+          Constant *Cst = dyn_cast_or_null<Constant>(ConvertedShadow);
+          // TODO(eugenis): handle non-zero constant shadow by inserting an
+          // unconditional check (can not simply fail compilation as this could
+          // be in the dead code).
+          if (Cst)
+            continue;
+
+          Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
+              getCleanShadow(ConvertedShadow), "_mscmp");
+          Instruction *CheckTerm =
+            SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false, MS.OriginStoreWeights);
+          IRBuilder<> IRBNewBlock(CheckTerm);
+          IRBNewBlock.CreateAlignedStore(getOrigin(Val),
+              getOriginPtr(Addr, IRBNewBlock), I.getAlignment());
+        }
+      }
+    }
+  }
+
+  void materializeChecks() {
+    for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) {
+      Instruction *Shadow = InstrumentationList[i].Shadow;
+      Instruction *OrigIns = InstrumentationList[i].OrigIns;
+      IRBuilder<> IRB(OrigIns);
+      DEBUG(dbgs() << "  SHAD0 : " << *Shadow << "\n");
+      Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
+      DEBUG(dbgs() << "  SHAD1 : " << *ConvertedShadow << "\n");
+      Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
+                                    getCleanShadow(ConvertedShadow), "_mscmp");
+      Instruction *CheckTerm =
+        SplitBlockAndInsertIfThen(cast<Instruction>(Cmp),
+                                  /* Unreachable */ !ClKeepGoing,
+                                  MS.ColdCallWeights);
+
+      IRB.SetInsertPoint(CheckTerm);
+      if (ClTrackOrigins) {
+        Instruction *Origin = InstrumentationList[i].Origin;
+        IRB.CreateStore(Origin ? (Value*)Origin : (Value*)IRB.getInt32(0),
+                        MS.OriginTLS);
+      }
+      CallInst *Call = IRB.CreateCall(MS.WarningFn);
+      Call->setDebugLoc(OrigIns->getDebugLoc());
+      IRB.CreateCall(MS.EmptyAsm);
+      DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
+    }
+    DEBUG(dbgs() << "DONE:\n" << F);
+  }
+
+  /// \brief Add MemorySanitizer instrumentation to a function.
+  bool runOnFunction() {
+    MS.initializeCallbacks(*F.getParent());
+    if (!MS.TD) return false;
+    // Iterate all BBs in depth-first order and create shadow instructions
+    // for all instructions (where applicable).
+    // For PHI nodes we create dummy shadow PHIs which will be finalized later.
+    for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+         DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
+      BasicBlock *BB = *DI;
+      visit(*BB);
+    }
+
+    // Finalize PHI nodes.
+    for (size_t i = 0, n = ShadowPHINodes.size(); i < n; i++) {
+      PHINode *PN = ShadowPHINodes[i];
+      PHINode *PNS = cast<PHINode>(getShadow(PN));
+      PHINode *PNO = ClTrackOrigins ? cast<PHINode>(getOrigin(PN)) : 0;
+      size_t NumValues = PN->getNumIncomingValues();
+      for (size_t v = 0; v < NumValues; v++) {
+        PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
+        if (PNO)
+          PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
+      }
+    }
+
+    VAHelper->finalizeInstrumentation();
+
+    // Delayed instrumentation of StoreInst.
+    // This may add new checks to be inserted later.
+    materializeStores();
+
+    // Insert shadow value checks.
+    materializeChecks();
+
+    return true;
+  }
+
+  /// \brief Compute the shadow type that corresponds to a given Value.
+  Type *getShadowTy(Value *V) {
+    return getShadowTy(V->getType());
+  }
+
+  /// \brief Compute the shadow type that corresponds to a given Type.
+  Type *getShadowTy(Type *OrigTy) {
+    if (!OrigTy->isSized()) {
+      return 0;
+    }
+    // For integer type, shadow is the same as the original type.
+    // This may return weird-sized types like i1.
+    if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
+      return IT;
+    if (VectorType *VT = dyn_cast<VectorType>(OrigTy))
+      return VectorType::getInteger(VT);
+    if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
+      SmallVector<Type*, 4> Elements;
+      for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
+        Elements.push_back(getShadowTy(ST->getElementType(i)));
+      StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
+      DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
+      return Res;
+    }
+    uint32_t TypeSize = MS.TD->getTypeStoreSizeInBits(OrigTy);
+    return IntegerType::get(*MS.C, TypeSize);
+  }
+
+  /// \brief Flatten a vector type.
+  Type *getShadowTyNoVec(Type *ty) {
+    if (VectorType *vt = dyn_cast<VectorType>(ty))
+      return IntegerType::get(*MS.C, vt->getBitWidth());
+    return ty;
+  }
+
+  /// \brief Convert a shadow value to it's flattened variant.
+  Value *convertToShadowTyNoVec(Value *V, IRBuilder<> &IRB) {
+    Type *Ty = V->getType();
+    Type *NoVecTy = getShadowTyNoVec(Ty);
+    if (Ty == NoVecTy) return V;
+    return IRB.CreateBitCast(V, NoVecTy);
+  }
+
+  /// \brief Compute the shadow address that corresponds to a given application
+  /// address.
+  ///
+  /// Shadow = Addr & ~ShadowMask.
+  Value *getShadowPtr(Value *Addr, Type *ShadowTy,
+                      IRBuilder<> &IRB) {
+    Value *ShadowLong =
+      IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy),
+                    ConstantInt::get(MS.IntptrTy, ~MS.ShadowMask));
+    return IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
+  }
+
+  /// \brief Compute the origin address that corresponds to a given application
+  /// address.
+  ///
+  /// OriginAddr = (ShadowAddr + OriginOffset) & ~3ULL
+  Value *getOriginPtr(Value *Addr, IRBuilder<> &IRB) {
+    Value *ShadowLong =
+      IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy),
+                    ConstantInt::get(MS.IntptrTy, ~MS.ShadowMask));
+    Value *Add =
+      IRB.CreateAdd(ShadowLong,
+                    ConstantInt::get(MS.IntptrTy, MS.OriginOffset));
+    Value *SecondAnd =
+      IRB.CreateAnd(Add, ConstantInt::get(MS.IntptrTy, ~3ULL));
+    return IRB.CreateIntToPtr(SecondAnd, PointerType::get(IRB.getInt32Ty(), 0));
+  }
+
+  /// \brief Compute the shadow address for a given function argument.
+  ///
+  /// Shadow = ParamTLS+ArgOffset.
+  Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB,
+                                 int ArgOffset) {
+    Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
+    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
+                              "_msarg");
+  }
+
+  /// \brief Compute the origin address for a given function argument.
+  Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
+                                 int ArgOffset) {
+    if (!ClTrackOrigins) return 0;
+    Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
+    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
+                              "_msarg_o");
+  }
+
+  /// \brief Compute the shadow address for a retval.
+  Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
+    Value *Base = IRB.CreatePointerCast(MS.RetvalTLS, MS.IntptrTy);
+    return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
+                              "_msret");
+  }
+
+  /// \brief Compute the origin address for a retval.
+  Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
+    // We keep a single origin for the entire retval. Might be too optimistic.
+    return MS.RetvalOriginTLS;
+  }
+
+  /// \brief Set SV to be the shadow value for V.
+  void setShadow(Value *V, Value *SV) {
+    assert(!ShadowMap.count(V) && "Values may only have one shadow");
+    ShadowMap[V] = SV;
+  }
+
+  /// \brief Set Origin to be the origin value for V.
+  void setOrigin(Value *V, Value *Origin) {
+    if (!ClTrackOrigins) return;
+    assert(!OriginMap.count(V) && "Values may only have one origin");
+    DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
+    OriginMap[V] = Origin;
+  }
+
+  /// \brief Create a clean shadow value for a given value.
+  ///
+  /// Clean shadow (all zeroes) means all bits of the value are defined
+  /// (initialized).
+  Value *getCleanShadow(Value *V) {
+    Type *ShadowTy = getShadowTy(V);
+    if (!ShadowTy)
+      return 0;
+    return Constant::getNullValue(ShadowTy);
+  }
+
+  /// \brief Create a dirty shadow of a given shadow type.
+  Constant *getPoisonedShadow(Type *ShadowTy) {
+    assert(ShadowTy);
+    if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
+      return Constant::getAllOnesValue(ShadowTy);
+    StructType *ST = cast<StructType>(ShadowTy);
+    SmallVector<Constant *, 4> Vals;
+    for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
+      Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
+    return ConstantStruct::get(ST, Vals);
+  }
+
+  /// \brief Create a clean (zero) origin.
+  Value *getCleanOrigin() {
+    return Constant::getNullValue(MS.OriginTy);
+  }
+
+  /// \brief Get the shadow value for a given Value.
+  ///
+  /// This function either returns the value set earlier with setShadow,
+  /// or extracts if from ParamTLS (for function arguments).
+  Value *getShadow(Value *V) {
+    if (Instruction *I = dyn_cast<Instruction>(V)) {
+      // For instructions the shadow is already stored in the map.
+      Value *Shadow = ShadowMap[V];
+      if (!Shadow) {
+        DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
+        (void)I;
+        assert(Shadow && "No shadow for a value");
+      }
+      return Shadow;
+    }
+    if (UndefValue *U = dyn_cast<UndefValue>(V)) {
+      Value *AllOnes = getPoisonedShadow(getShadowTy(V));
+      DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
+      (void)U;
+      return AllOnes;
+    }
+    if (Argument *A = dyn_cast<Argument>(V)) {
+      // For arguments we compute the shadow on demand and store it in the map.
+      Value **ShadowPtr = &ShadowMap[V];
+      if (*ShadowPtr)
+        return *ShadowPtr;
+      Function *F = A->getParent();
+      IRBuilder<> EntryIRB(F->getEntryBlock().getFirstNonPHI());
+      unsigned ArgOffset = 0;
+      for (Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+           AI != AE; ++AI) {
+        if (!AI->getType()->isSized()) {
+          DEBUG(dbgs() << "Arg is not sized\n");
+          continue;
+        }
+        unsigned Size = AI->hasByValAttr()
+          ? MS.TD->getTypeAllocSize(AI->getType()->getPointerElementType())
+          : MS.TD->getTypeAllocSize(AI->getType());
+        if (A == AI) {
+          Value *Base = getShadowPtrForArgument(AI, EntryIRB, ArgOffset);
+          if (AI->hasByValAttr()) {
+            // ByVal pointer itself has clean shadow. We copy the actual
+            // argument shadow to the underlying memory.
+            Value *Cpy = EntryIRB.CreateMemCpy(
+              getShadowPtr(V, EntryIRB.getInt8Ty(), EntryIRB),
+              Base, Size, AI->getParamAlignment());
+            DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
+            (void)Cpy;
+            *ShadowPtr = getCleanShadow(V);
+          } else {
+            *ShadowPtr = EntryIRB.CreateLoad(Base);
+          }
+          DEBUG(dbgs() << "  ARG:    "  << *AI << " ==> " <<
+                **ShadowPtr << "\n");
+          if (ClTrackOrigins) {
+            Value* OriginPtr = getOriginPtrForArgument(AI, EntryIRB, ArgOffset);
+            setOrigin(A, EntryIRB.CreateLoad(OriginPtr));
+          }
+        }
+        ArgOffset += DataLayout::RoundUpAlignment(Size, 8);
+      }
+      assert(*ShadowPtr && "Could not find shadow for an argument");
+      return *ShadowPtr;
+    }
+    // For everything else the shadow is zero.
+    return getCleanShadow(V);
+  }
+
+  /// \brief Get the shadow for i-th argument of the instruction I.
+  Value *getShadow(Instruction *I, int i) {
+    return getShadow(I->getOperand(i));
+  }
+
+  /// \brief Get the origin for a value.
+  Value *getOrigin(Value *V) {
+    if (!ClTrackOrigins) return 0;
+    if (isa<Instruction>(V) || isa<Argument>(V)) {
+      Value *Origin = OriginMap[V];
+      if (!Origin) {
+        DEBUG(dbgs() << "NO ORIGIN: " << *V << "\n");
+        Origin = getCleanOrigin();
+      }
+      return Origin;
+    }
+    return getCleanOrigin();
+  }
+
+  /// \brief Get the origin for i-th argument of the instruction I.
+  Value *getOrigin(Instruction *I, int i) {
+    return getOrigin(I->getOperand(i));
+  }
+
+  /// \brief Remember the place where a shadow check should be inserted.
+  ///
+  /// This location will be later instrumented with a check that will print a
+  /// UMR warning in runtime if the value is not fully defined.
+  void insertCheck(Value *Val, Instruction *OrigIns) {
+    assert(Val);
+    if (!InsertChecks) return;
+    Instruction *Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
+    if (!Shadow) return;
+#ifndef NDEBUG
+    Type *ShadowTy = Shadow->getType();
+    assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy)) &&
+           "Can only insert checks for integer and vector shadow types");
+#endif
+    Instruction *Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
+    InstrumentationList.push_back(
+      ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
+  }
+
+  //------------------- Visitors.
+
+  /// \brief Instrument LoadInst
+  ///
+  /// Loads the corresponding shadow and (optionally) origin.
+  /// Optionally, checks that the load address is fully defined.
+  void visitLoadInst(LoadInst &I) {
+    assert(I.getType()->isSized() && "Load type must have size");
+    IRBuilder<> IRB(&I);
+    Type *ShadowTy = getShadowTy(&I);
+    Value *Addr = I.getPointerOperand();
+    Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB);
+    setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld"));
+
+    if (ClCheckAccessAddress)
+      insertCheck(I.getPointerOperand(), &I);
+
+    if (ClTrackOrigins)
+      setOrigin(&I, IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), I.getAlignment()));
+  }
+
+  /// \brief Instrument StoreInst
+  ///
+  /// Stores the corresponding shadow and (optionally) origin.
+  /// Optionally, checks that the store address is fully defined.
+  /// Volatile stores check that the value being stored is fully defined.
+  void visitStoreInst(StoreInst &I) {
+    StoreList.push_back(&I);
+  }
+
+  // Vector manipulation.
+  void visitExtractElementInst(ExtractElementInst &I) {
+    insertCheck(I.getOperand(1), &I);
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
+              "_msprop"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitInsertElementInst(InsertElementInst &I) {
+    insertCheck(I.getOperand(2), &I);
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateInsertElement(getShadow(&I, 0), getShadow(&I, 1),
+              I.getOperand(2), "_msprop"));
+    setOriginForNaryOp(I);
+  }
+
+  void visitShuffleVectorInst(ShuffleVectorInst &I) {
+    insertCheck(I.getOperand(2), &I);
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateShuffleVector(getShadow(&I, 0), getShadow(&I, 1),
+              I.getOperand(2), "_msprop"));
+    setOriginForNaryOp(I);
+  }
+
+  // Casts.
+  void visitSExtInst(SExtInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitZExtInst(ZExtInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitTruncInst(TruncInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitBitCastInst(BitCastInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitPtrToIntInst(PtrToIntInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
+             "_msprop_ptrtoint"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitIntToPtrInst(IntToPtrInst &I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
+             "_msprop_inttoptr"));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitFPToSIInst(CastInst& I) { handleShadowOr(I); }
+  void visitFPToUIInst(CastInst& I) { handleShadowOr(I); }
+  void visitSIToFPInst(CastInst& I) { handleShadowOr(I); }
+  void visitUIToFPInst(CastInst& I) { handleShadowOr(I); }
+  void visitFPExtInst(CastInst& I) { handleShadowOr(I); }
+  void visitFPTruncInst(CastInst& I) { handleShadowOr(I); }
+
+  /// \brief Propagate shadow for bitwise AND.
+  ///
+  /// This code is exact, i.e. if, for example, a bit in the left argument
+  /// is defined and 0, then neither the value not definedness of the
+  /// corresponding bit in B don't affect the resulting shadow.
+  void visitAnd(BinaryOperator &I) {
+    IRBuilder<> IRB(&I);
+    //  "And" of 0 and a poisoned value results in unpoisoned value.
+    //  1&1 => 1;     0&1 => 0;     p&1 => p;
+    //  1&0 => 0;     0&0 => 0;     p&0 => 0;
+    //  1&p => p;     0&p => 0;     p&p => p;
+    //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
+    Value *S1 = getShadow(&I, 0);
+    Value *S2 = getShadow(&I, 1);
+    Value *V1 = I.getOperand(0);
+    Value *V2 = I.getOperand(1);
+    if (V1->getType() != S1->getType()) {
+      V1 = IRB.CreateIntCast(V1, S1->getType(), false);
+      V2 = IRB.CreateIntCast(V2, S2->getType(), false);
+    }
+    Value *S1S2 = IRB.CreateAnd(S1, S2);
+    Value *V1S2 = IRB.CreateAnd(V1, S2);
+    Value *S1V2 = IRB.CreateAnd(S1, V2);
+    setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
+    setOriginForNaryOp(I);
+  }
+
+  void visitOr(BinaryOperator &I) {
+    IRBuilder<> IRB(&I);
+    //  "Or" of 1 and a poisoned value results in unpoisoned value.
+    //  1|1 => 1;     0|1 => 1;     p|1 => 1;
+    //  1|0 => 1;     0|0 => 0;     p|0 => p;
+    //  1|p => 1;     0|p => p;     p|p => p;
+    //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
+    Value *S1 = getShadow(&I, 0);
+    Value *S2 = getShadow(&I, 1);
+    Value *V1 = IRB.CreateNot(I.getOperand(0));
+    Value *V2 = IRB.CreateNot(I.getOperand(1));
+    if (V1->getType() != S1->getType()) {
+      V1 = IRB.CreateIntCast(V1, S1->getType(), false);
+      V2 = IRB.CreateIntCast(V2, S2->getType(), false);
+    }
+    Value *S1S2 = IRB.CreateAnd(S1, S2);
+    Value *V1S2 = IRB.CreateAnd(V1, S2);
+    Value *S1V2 = IRB.CreateAnd(S1, V2);
+    setShadow(&I, IRB.CreateOr(S1S2, IRB.CreateOr(V1S2, S1V2)));
+    setOriginForNaryOp(I);
+  }
+
+  /// \brief Propagate origin for an instruction.
+  ///
+  /// This is a general case of origin propagation. For an Nary operation,
+  /// is set to the origin of an argument that is not entirely initialized.
+  /// If there is more than one such arguments, the rightmost of them is picked.
+  /// It does not matter which one is picked if all arguments are initialized.
+  void setOriginForNaryOp(Instruction &I) {
+    if (!ClTrackOrigins) return;
+    IRBuilder<> IRB(&I);
+    Value *Origin = getOrigin(&I, 0);
+    for (unsigned Op = 1, n = I.getNumOperands(); Op < n; ++Op) {
+      Value *S = convertToShadowTyNoVec(getShadow(&I, Op), IRB);
+      Origin = IRB.CreateSelect(IRB.CreateICmpNE(S, getCleanShadow(S)),
+                                getOrigin(&I, Op), Origin);
+    }
+    setOrigin(&I, Origin);
+  }
+
+  /// \brief Propagate shadow for a binary operation.
+  ///
+  /// Shadow = Shadow0 | Shadow1, all 3 must have the same type.
+  /// Bitwise OR is selected as an operation that will never lose even a bit of
+  /// poison.
+  void handleShadowOrBinary(Instruction &I) {
+    IRBuilder<> IRB(&I);
+    Value *Shadow0 = getShadow(&I, 0);
+    Value *Shadow1 = getShadow(&I, 1);
+    setShadow(&I, IRB.CreateOr(Shadow0, Shadow1, "_msprop"));
+    setOriginForNaryOp(I);
+  }
+
+  /// \brief Propagate shadow for arbitrary operation.
+  ///
+  /// This is a general case of shadow propagation, used in all cases where we
+  /// don't know and/or care about what the operation actually does.
+  /// It converts all input shadow values to a common type (extending or
+  /// truncating as necessary), and bitwise OR's them.
+  ///
+  /// This is much cheaper than inserting checks (i.e. requiring inputs to be
+  /// fully initialized), and less prone to false positives.
+  // FIXME: is the casting actually correct?
+  // FIXME: merge this with handleShadowOrBinary.
+  void handleShadowOr(Instruction &I) {
+    IRBuilder<> IRB(&I);
+    Value *Shadow = getShadow(&I, 0);
+    for (unsigned Op = 1, n = I.getNumOperands(); Op < n; ++Op)
+      Shadow = IRB.CreateOr(
+        Shadow, IRB.CreateIntCast(getShadow(&I, Op), Shadow->getType(), false),
+        "_msprop");
+    Shadow = IRB.CreateIntCast(Shadow, getShadowTy(&I), false);
+    setShadow(&I, Shadow);
+    setOriginForNaryOp(I);
+  }
+
+  void visitFAdd(BinaryOperator &I) { handleShadowOrBinary(I); }
+  void visitFSub(BinaryOperator &I) { handleShadowOrBinary(I); }
+  void visitFMul(BinaryOperator &I) { handleShadowOrBinary(I); }
+  void visitAdd(BinaryOperator &I) { handleShadowOrBinary(I); }
+  void visitSub(BinaryOperator &I) { handleShadowOrBinary(I); }
+  void visitXor(BinaryOperator &I) { handleShadowOrBinary(I); }
+  void visitMul(BinaryOperator &I) { handleShadowOrBinary(I); }
+
+  void handleDiv(Instruction &I) {
+    IRBuilder<> IRB(&I);
+    // Strict on the second argument.
+    insertCheck(I.getOperand(1), &I);
+    setShadow(&I, getShadow(&I, 0));
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  void visitUDiv(BinaryOperator &I) { handleDiv(I); }
+  void visitSDiv(BinaryOperator &I) { handleDiv(I); }
+  void visitFDiv(BinaryOperator &I) { handleDiv(I); }
+  void visitURem(BinaryOperator &I) { handleDiv(I); }
+  void visitSRem(BinaryOperator &I) { handleDiv(I); }
+  void visitFRem(BinaryOperator &I) { handleDiv(I); }
+
+  /// \brief Instrument == and != comparisons.
+  ///
+  /// Sometimes the comparison result is known even if some of the bits of the
+  /// arguments are not.
+  void handleEqualityComparison(ICmpInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *A = I.getOperand(0);
+    Value *B = I.getOperand(1);
+    Value *Sa = getShadow(A);
+    Value *Sb = getShadow(B);
+    if (A->getType()->isPointerTy())
+      A = IRB.CreatePointerCast(A, MS.IntptrTy);
+    if (B->getType()->isPointerTy())
+      B = IRB.CreatePointerCast(B, MS.IntptrTy);
+    // A == B  <==>  (C = A^B) == 0
+    // A != B  <==>  (C = A^B) != 0
+    // Sc = Sa | Sb
+    Value *C = IRB.CreateXor(A, B);
+    Value *Sc = IRB.CreateOr(Sa, Sb);
+    // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
+    // Result is defined if one of the following is true
+    // * there is a defined 1 bit in C
+    // * C is fully defined
+    // Si = !(C & ~Sc) && Sc
+    Value *Zero = Constant::getNullValue(Sc->getType());
+    Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
+    Value *Si =
+      IRB.CreateAnd(IRB.CreateICmpNE(Sc, Zero),
+                    IRB.CreateICmpEQ(
+                      IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero));
+    Si->setName("_msprop_icmp");
+    setShadow(&I, Si);
+    setOriginForNaryOp(I);
+  }
+
+  /// \brief Instrument signed relational comparisons.
+  ///
+  /// Handle (x<0) and (x>=0) comparisons (essentially, sign bit tests) by
+  /// propagating the highest bit of the shadow. Everything else is delegated
+  /// to handleShadowOr().
+  void handleSignedRelationalComparison(ICmpInst &I) {
+    Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
+    Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
+    Value* op = NULL;
+    CmpInst::Predicate pre = I.getPredicate();
+    if (constOp0 && constOp0->isNullValue() &&
+        (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE)) {
+      op = I.getOperand(1);
+    } else if (constOp1 && constOp1->isNullValue() &&
+               (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) {
+      op = I.getOperand(0);
+    }
+    if (op) {
+      IRBuilder<> IRB(&I);
+      Value* Shadow =
+        IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op), "_msprop_icmpslt");
+      setShadow(&I, Shadow);
+      setOrigin(&I, getOrigin(op));
+    } else {
+      handleShadowOr(I);
+    }
+  }
+
+  void visitICmpInst(ICmpInst &I) {
+    if (ClHandleICmp && I.isEquality())
+      handleEqualityComparison(I);
+    else if (ClHandleICmp && I.isSigned() && I.isRelational())
+      handleSignedRelationalComparison(I);
+    else
+      handleShadowOr(I);
+  }
+
+  void visitFCmpInst(FCmpInst &I) {
+    handleShadowOr(I);
+  }
+
+  void handleShift(BinaryOperator &I) {
+    IRBuilder<> IRB(&I);
+    // If any of the S2 bits are poisoned, the whole thing is poisoned.
+    // Otherwise perform the same shift on S1.
+    Value *S1 = getShadow(&I, 0);
+    Value *S2 = getShadow(&I, 1);
+    Value *S2Conv = IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)),
+                                   S2->getType());
+    Value *V2 = I.getOperand(1);
+    Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
+    setShadow(&I, IRB.CreateOr(Shift, S2Conv));
+    setOriginForNaryOp(I);
+  }
+
+  void visitShl(BinaryOperator &I) { handleShift(I); }
+  void visitAShr(BinaryOperator &I) { handleShift(I); }
+  void visitLShr(BinaryOperator &I) { handleShift(I); }
+
+  /// \brief Instrument llvm.memmove
+  ///
+  /// At this point we don't know if llvm.memmove will be inlined or not.
+  /// If we don't instrument it and it gets inlined,
+  /// our interceptor will not kick in and we will lose the memmove.
+  /// If we instrument the call here, but it does not get inlined,
+  /// we will memove the shadow twice: which is bad in case
+  /// of overlapping regions. So, we simply lower the intrinsic to a call.
+  ///
+  /// Similar situation exists for memcpy and memset.
+  void visitMemMoveInst(MemMoveInst &I) {
+    IRBuilder<> IRB(&I);
+    IRB.CreateCall3(
+      MS.MemmoveFn,
+      IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+      IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
+      IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
+    I.eraseFromParent();
+  }
+
+  // Similar to memmove: avoid copying shadow twice.
+  // This is somewhat unfortunate as it may slowdown small constant memcpys.
+  // FIXME: consider doing manual inline for small constant sizes and proper
+  // alignment.
+  void visitMemCpyInst(MemCpyInst &I) {
+    IRBuilder<> IRB(&I);
+    IRB.CreateCall3(
+      MS.MemcpyFn,
+      IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+      IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
+      IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
+    I.eraseFromParent();
+  }
+
+  // Same as memcpy.
+  void visitMemSetInst(MemSetInst &I) {
+    IRBuilder<> IRB(&I);
+    IRB.CreateCall3(
+      MS.MemsetFn,
+      IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
+      IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
+      IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false));
+    I.eraseFromParent();
+  }
+
+  void visitVAStartInst(VAStartInst &I) {
+    VAHelper->visitVAStartInst(I);
+  }
+
+  void visitVACopyInst(VACopyInst &I) {
+    VAHelper->visitVACopyInst(I);
+  }
+
+  void handleBswap(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *Op = I.getArgOperand(0);
+    Type *OpType = Op->getType();
+    Function *BswapFunc = Intrinsic::getDeclaration(
+      F.getParent(), Intrinsic::bswap, ArrayRef<Type*>(&OpType, 1));
+    setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
+    setOrigin(&I, getOrigin(Op));
+  }
+
+  void visitIntrinsicInst(IntrinsicInst &I) {
+    switch (I.getIntrinsicID()) {
+    case llvm::Intrinsic::bswap:
+      handleBswap(I); break;
+    default:
+      visitInstruction(I); break;
+    }
+  }
+
+  void visitCallSite(CallSite CS) {
+    Instruction &I = *CS.getInstruction();
+    assert((CS.isCall() || CS.isInvoke()) && "Unknown type of CallSite");
+    if (CS.isCall()) {
+      CallInst *Call = cast<CallInst>(&I);
+
+      // For inline asm, do the usual thing: check argument shadow and mark all
+      // outputs as clean. Note that any side effects of the inline asm that are
+      // not immediately visible in its constraints are not handled.
+      if (Call->isInlineAsm()) {
+        visitInstruction(I);
+        return;
+      }
+
+      // Allow only tail calls with the same types, otherwise
+      // we may have a false positive: shadow for a non-void RetVal
+      // will get propagated to a void RetVal.
+      if (Call->isTailCall() && Call->getType() != Call->getParent()->getType())
+        Call->setTailCall(false);
+
+      assert(!isa<IntrinsicInst>(&I) && "intrinsics are handled elsewhere");
+
+      // We are going to insert code that relies on the fact that the callee
+      // will become a non-readonly function after it is instrumented by us. To
+      // prevent this code from being optimized out, mark that function
+      // non-readonly in advance.
+      if (Function *Func = Call->getCalledFunction()) {
+        // Clear out readonly/readnone attributes.
+        AttrBuilder B;
+        B.addAttribute(Attributes::ReadOnly)
+          .addAttribute(Attributes::ReadNone);
+        Func->removeAttribute(AttributeSet::FunctionIndex,
+                              Attributes::get(Func->getContext(), B));
+      }
+    }
+    IRBuilder<> IRB(&I);
+    unsigned ArgOffset = 0;
+    DEBUG(dbgs() << "  CallSite: " << I << "\n");
+    for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
+         ArgIt != End; ++ArgIt) {
+      Value *A = *ArgIt;
+      unsigned i = ArgIt - CS.arg_begin();
+      if (!A->getType()->isSized()) {
+        DEBUG(dbgs() << "Arg " << i << " is not sized: " << I << "\n");
+        continue;
+      }
+      unsigned Size = 0;
+      Value *Store = 0;
+      // Compute the Shadow for arg even if it is ByVal, because
+      // in that case getShadow() will copy the actual arg shadow to
+      // __msan_param_tls.
+      Value *ArgShadow = getShadow(A);
+      Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
+      DEBUG(dbgs() << "  Arg#" << i << ": " << *A <<
+            " Shadow: " << *ArgShadow << "\n");
+      if (CS.paramHasAttr(i + 1, Attributes::ByVal)) {
+        assert(A->getType()->isPointerTy() &&
+               "ByVal argument is not a pointer!");
+        Size = MS.TD->getTypeAllocSize(A->getType()->getPointerElementType());
+        unsigned Alignment = CS.getParamAlignment(i + 1);
+        Store = IRB.CreateMemCpy(ArgShadowBase,
+                                 getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB),
+                                 Size, Alignment);
+      } else {
+        Size = MS.TD->getTypeAllocSize(A->getType());
+        Store = IRB.CreateStore(ArgShadow, ArgShadowBase);
+      }
+      if (ClTrackOrigins)
+        IRB.CreateStore(getOrigin(A),
+                        getOriginPtrForArgument(A, IRB, ArgOffset));
+      assert(Size != 0 && Store != 0);
+      DEBUG(dbgs() << "  Param:" << *Store << "\n");
+      ArgOffset += DataLayout::RoundUpAlignment(Size, 8);
+    }
+    DEBUG(dbgs() << "  done with call args\n");
+
+    FunctionType *FT =
+      cast<FunctionType>(CS.getCalledValue()->getType()-> getContainedType(0));
+    if (FT->isVarArg()) {
+      VAHelper->visitCallSite(CS, IRB);
+    }
+
+    // Now, get the shadow for the RetVal.
+    if (!I.getType()->isSized()) return;
+    IRBuilder<> IRBBefore(&I);
+    // Untill we have full dynamic coverage, make sure the retval shadow is 0.
+    Value *Base = getShadowPtrForRetval(&I, IRBBefore);
+    IRBBefore.CreateStore(getCleanShadow(&I), Base);
+    Instruction *NextInsn = 0;
+    if (CS.isCall()) {
+      NextInsn = I.getNextNode();
+    } else {
+      BasicBlock *NormalDest = cast<InvokeInst>(&I)->getNormalDest();
+      if (!NormalDest->getSinglePredecessor()) {
+        // FIXME: this case is tricky, so we are just conservative here.
+        // Perhaps we need to split the edge between this BB and NormalDest,
+        // but a naive attempt to use SplitEdge leads to a crash.
+        setShadow(&I, getCleanShadow(&I));
+        setOrigin(&I, getCleanOrigin());
+        return;
+      }
+      NextInsn = NormalDest->getFirstInsertionPt();
+      assert(NextInsn &&
+             "Could not find insertion point for retval shadow load");
+    }
+    IRBuilder<> IRBAfter(NextInsn);
+    setShadow(&I, IRBAfter.CreateLoad(getShadowPtrForRetval(&I, IRBAfter),
+                                      "_msret"));
+    if (ClTrackOrigins)
+      setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter)));
+  }
+
+  void visitReturnInst(ReturnInst &I) {
+    IRBuilder<> IRB(&I);
+    if (Value *RetVal = I.getReturnValue()) {
+      // Set the shadow for the RetVal.
+      Value *Shadow = getShadow(RetVal);
+      Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
+      DEBUG(dbgs() << "Return: " << *Shadow << "\n" << *ShadowPtr << "\n");
+      IRB.CreateStore(Shadow, ShadowPtr);
+      if (ClTrackOrigins)
+        IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
+    }
+  }
+
+  void visitPHINode(PHINode &I) {
+    IRBuilder<> IRB(&I);
+    ShadowPHINodes.push_back(&I);
+    setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
+                                "_msphi_s"));
+    if (ClTrackOrigins)
+      setOrigin(&I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(),
+                                  "_msphi_o"));
+  }
+
+  void visitAllocaInst(AllocaInst &I) {
+    setShadow(&I, getCleanShadow(&I));
+    if (!ClPoisonStack) return;
+    IRBuilder<> IRB(I.getNextNode());
+    uint64_t Size = MS.TD->getTypeAllocSize(I.getAllocatedType());
+    if (ClPoisonStackWithCall) {
+      IRB.CreateCall2(MS.MsanPoisonStackFn,
+                      IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
+                      ConstantInt::get(MS.IntptrTy, Size));
+    } else {
+      Value *ShadowBase = getShadowPtr(&I, Type::getInt8PtrTy(*MS.C), IRB);
+      IRB.CreateMemSet(ShadowBase, IRB.getInt8(ClPoisonStackPattern),
+                       Size, I.getAlignment());
+    }
+
+    if (ClTrackOrigins) {
+      setOrigin(&I, getCleanOrigin());
+      SmallString<2048> StackDescriptionStorage;
+      raw_svector_ostream StackDescription(StackDescriptionStorage);
+      // We create a string with a description of the stack allocation and
+      // pass it into __msan_set_alloca_origin.
+      // It will be printed by the run-time if stack-originated UMR is found.
+      // The first 4 bytes of the string are set to '----' and will be replaced
+      // by __msan_va_arg_overflow_size_tls at the first call.
+      StackDescription << "----" << I.getName() << "@" << F.getName();
+      Value *Descr =
+          createPrivateNonConstGlobalForString(*F.getParent(),
+                                               StackDescription.str());
+      IRB.CreateCall3(MS.MsanSetAllocaOriginFn,
+                      IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()),
+                      ConstantInt::get(MS.IntptrTy, Size),
+                      IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy()));
+    }
+  }
+
+  void visitSelectInst(SelectInst& I) {
+    IRBuilder<> IRB(&I);
+    setShadow(&I,  IRB.CreateSelect(I.getCondition(),
+              getShadow(I.getTrueValue()), getShadow(I.getFalseValue()),
+              "_msprop"));
+    if (ClTrackOrigins)
+      setOrigin(&I, IRB.CreateSelect(I.getCondition(),
+                getOrigin(I.getTrueValue()), getOrigin(I.getFalseValue())));
+  }
+
+  void visitLandingPadInst(LandingPadInst &I) {
+    // Do nothing.
+    // See http://code.google.com/p/memory-sanitizer/issues/detail?id=1
+    setShadow(&I, getCleanShadow(&I));
+    setOrigin(&I, getCleanOrigin());
+  }
+
+  void visitGetElementPtrInst(GetElementPtrInst &I) {
+    handleShadowOr(I);
+  }
+
+  void visitExtractValueInst(ExtractValueInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *Agg = I.getAggregateOperand();
+    DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
+    Value *AggShadow = getShadow(Agg);
+    DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
+    Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
+    DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
+    setShadow(&I, ResShadow);
+    setOrigin(&I, getCleanOrigin());
+  }
+
+  void visitInsertValueInst(InsertValueInst &I) {
+    IRBuilder<> IRB(&I);
+    DEBUG(dbgs() << "InsertValue:  " << I << "\n");
+    Value *AggShadow = getShadow(I.getAggregateOperand());
+    Value *InsShadow = getShadow(I.getInsertedValueOperand());
+    DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
+    DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
+    Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
+    DEBUG(dbgs() << "   Res:        " << *Res << "\n");
+    setShadow(&I, Res);
+    setOrigin(&I, getCleanOrigin());
+  }
+
+  void dumpInst(Instruction &I) {
+    if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+      errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
+    } else {
+      errs() << "ZZZ " << I.getOpcodeName() << "\n";
+    }
+    errs() << "QQQ " << I << "\n";
+  }
+
+  void visitResumeInst(ResumeInst &I) {
+    DEBUG(dbgs() << "Resume: " << I << "\n");
+    // Nothing to do here.
+  }
+
+  void visitInstruction(Instruction &I) {
+    // Everything else: stop propagating and check for poisoned shadow.
+    if (ClDumpStrictInstructions)
+      dumpInst(I);
+    DEBUG(dbgs() << "DEFAULT: " << I << "\n");
+    for (size_t i = 0, n = I.getNumOperands(); i < n; i++)
+      insertCheck(I.getOperand(i), &I);
+    setShadow(&I, getCleanShadow(&I));
+    setOrigin(&I, getCleanOrigin());
+  }
+};
+
+/// \brief AMD64-specific implementation of VarArgHelper.
+struct VarArgAMD64Helper : public VarArgHelper {
+  // An unfortunate workaround for asymmetric lowering of va_arg stuff.
+  // See a comment in visitCallSite for more details.
+  static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
+  static const unsigned AMD64FpEndOffset = 176;
+
+  Function &F;
+  MemorySanitizer &MS;
+  MemorySanitizerVisitor &MSV;
+  Value *VAArgTLSCopy;
+  Value *VAArgOverflowSize;
+
+  SmallVector<CallInst*, 16> VAStartInstrumentationList;
+
+  VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
+                    MemorySanitizerVisitor &MSV)
+    : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(0), VAArgOverflowSize(0) { }
+
+  enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
+
+  ArgKind classifyArgument(Value* arg) {
+    // A very rough approximation of X86_64 argument classification rules.
+    Type *T = arg->getType();
+    if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
+      return AK_FloatingPoint;
+    if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
+      return AK_GeneralPurpose;
+    if (T->isPointerTy())
+      return AK_GeneralPurpose;
+    return AK_Memory;
+  }
+
+  // For VarArg functions, store the argument shadow in an ABI-specific format
+  // that corresponds to va_list layout.
+  // We do this because Clang lowers va_arg in the frontend, and this pass
+  // only sees the low level code that deals with va_list internals.
+  // A much easier alternative (provided that Clang emits va_arg instructions)
+  // would have been to associate each live instance of va_list with a copy of
+  // MSanParamTLS, and extract shadow on va_arg() call in the argument list
+  // order.
+  void visitCallSite(CallSite &CS, IRBuilder<> &IRB) {
+    unsigned GpOffset = 0;
+    unsigned FpOffset = AMD64GpEndOffset;
+    unsigned OverflowOffset = AMD64FpEndOffset;
+    for (CallSite::arg_iterator ArgIt = CS.arg_begin(), End = CS.arg_end();
+         ArgIt != End; ++ArgIt) {
+      Value *A = *ArgIt;
+      ArgKind AK = classifyArgument(A);
+      if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
+        AK = AK_Memory;
+      if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
+        AK = AK_Memory;
+      Value *Base;
+      switch (AK) {
+      case AK_GeneralPurpose:
+        Base = getShadowPtrForVAArgument(A, IRB, GpOffset);
+        GpOffset += 8;
+        break;
+      case AK_FloatingPoint:
+        Base = getShadowPtrForVAArgument(A, IRB, FpOffset);
+        FpOffset += 16;
+        break;
+      case AK_Memory:
+        uint64_t ArgSize = MS.TD->getTypeAllocSize(A->getType());
+        Base = getShadowPtrForVAArgument(A, IRB, OverflowOffset);
+        OverflowOffset += DataLayout::RoundUpAlignment(ArgSize, 8);
+      }
+      IRB.CreateStore(MSV.getShadow(A), Base);
+    }
+    Constant *OverflowSize =
+      ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
+    IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
+  }
+
+  /// \brief Compute the shadow address for a given va_arg.
+  Value *getShadowPtrForVAArgument(Value *A, IRBuilder<> &IRB,
+                                   int ArgOffset) {
+    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(A), 0),
+                              "_msarg");
+  }
+
+  void visitVAStartInst(VAStartInst &I) {
+    IRBuilder<> IRB(&I);
+    VAStartInstrumentationList.push_back(&I);
+    Value *VAListTag = I.getArgOperand(0);
+    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
+
+    // Unpoison the whole __va_list_tag.
+    // FIXME: magic ABI constants.
+    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+                     /* size */24, /* alignment */16, false);
+  }
+
+  void visitVACopyInst(VACopyInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *VAListTag = I.getArgOperand(0);
+    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
+
+    // Unpoison the whole __va_list_tag.
+    // FIXME: magic ABI constants.
+    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+                     /* size */ 24, /* alignment */ 16, false);
+  }
+
+  void finalizeInstrumentation() {
+    assert(!VAArgOverflowSize && !VAArgTLSCopy &&
+           "finalizeInstrumentation called twice");
+    if (!VAStartInstrumentationList.empty()) {
+      // If there is a va_start in this function, make a backup copy of
+      // va_arg_tls somewhere in the function entry block.
+      IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+      VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+      Value *CopySize =
+        IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset),
+                      VAArgOverflowSize);
+      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+      IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8);
+    }
+
+    // Instrument va_start.
+    // Copy va_list shadow from the backup copy of the TLS contents.
+    for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
+      CallInst *OrigInst = VAStartInstrumentationList[i];
+      IRBuilder<> IRB(OrigInst->getNextNode());
+      Value *VAListTag = OrigInst->getArgOperand(0);
+
+      Value *RegSaveAreaPtrPtr =
+        IRB.CreateIntToPtr(
+          IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+                        ConstantInt::get(MS.IntptrTy, 16)),
+          Type::getInt64PtrTy(*MS.C));
+      Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrPtr);
+      Value *RegSaveAreaShadowPtr =
+        MSV.getShadowPtr(RegSaveAreaPtr, IRB.getInt8Ty(), IRB);
+      IRB.CreateMemCpy(RegSaveAreaShadowPtr, VAArgTLSCopy,
+                       AMD64FpEndOffset, 16);
+
+      Value *OverflowArgAreaPtrPtr =
+        IRB.CreateIntToPtr(
+          IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+                        ConstantInt::get(MS.IntptrTy, 8)),
+          Type::getInt64PtrTy(*MS.C));
+      Value *OverflowArgAreaPtr = IRB.CreateLoad(OverflowArgAreaPtrPtr);
+      Value *OverflowArgAreaShadowPtr =
+        MSV.getShadowPtr(OverflowArgAreaPtr, IRB.getInt8Ty(), IRB);
+      Value *SrcPtr =
+        getShadowPtrForVAArgument(VAArgTLSCopy, IRB, AMD64FpEndOffset);
+      IRB.CreateMemCpy(OverflowArgAreaShadowPtr, SrcPtr, VAArgOverflowSize, 16);
+    }
+  }
+};
+
+VarArgHelper* CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
+                                 MemorySanitizerVisitor &Visitor) {
+  return new VarArgAMD64Helper(Func, Msan, Visitor);
+}
+
+}  // namespace
+
+bool MemorySanitizer::runOnFunction(Function &F) {
+  MemorySanitizerVisitor Visitor(F, *this);
+
+  // Clear out readonly/readnone attributes.
+  AttrBuilder B;
+  B.addAttribute(Attributes::ReadOnly)
+    .addAttribute(Attributes::ReadNone);
+  F.removeAttribute(AttributeSet::FunctionIndex,
+                    Attributes::get(F.getContext(), B));
+
+  return Visitor.runOnFunction();
+}
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
index 1fe12545d2..8f8d027dca 100644
--- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -13,20 +13,20 @@
 //
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "insert-optimal-edge-profiling"
+#include "llvm/Transforms/Instrumentation.h"
+#include "MaximumSpanningTree.h"
 #include "ProfilingUtils.h"
-#include "llvm/Constants.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Instrumentation.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "MaximumSpanningTree.h"
 using namespace llvm;
 
 STATISTIC(NumEdgesInserted, "The # of edges inserted.");
diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp
index cc27146ebc..8aefe5901c 100644
--- a/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ b/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -45,24 +45,24 @@
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "insert-path-profiling"
 
-#include "llvm/DerivedTypes.h"
+#include "llvm/Transforms/Instrumentation.h"
 #include "ProfilingUtils.h"
 #include "llvm/Analysis/PathNumbering.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/InstrTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/TypeBuilder.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/TypeBuilder.h"
 #include <vector>
 
 #define HASH_THRESHHOLD 100000
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index d054b5e22f..f14a5d8a1e 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -21,27 +21,27 @@
 
 #define DEBUG_TYPE "tsan"
 
+#include "llvm/Transforms/Instrumentation.h"
 #include "BlackList.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Function.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
 #include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
+#include "llvm/Type.h"
 
 using namespace llvm;
 
@@ -78,6 +78,7 @@ struct ThreadSanitizer : public FunctionPass {
   static char ID;  // Pass identification, replacement for typeid.
 
  private:
+  void initializeCallbacks(Module &M);
   bool instrumentLoadOrStore(Instruction *I);
   bool instrumentAtomic(Instruction *I);
   void chooseInstructionsToInstrument(SmallVectorImpl<Instruction*> &Local,
@@ -130,18 +131,8 @@ static Function *checkInterfaceFunction(Constant *FuncOrBitcast) {
   report_fatal_error("ThreadSanitizer interface function redefined");
 }
 
-bool ThreadSanitizer::doInitialization(Module &M) {
-  TD = getAnalysisIfAvailable<DataLayout>();
-  if (!TD)
-    return false;
-  BL.reset(new BlackList(ClBlackListFile));
-
-  // Always insert a call to __tsan_init into the module's CTORs.
+void ThreadSanitizer::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(M.getContext());
-  Value *TsanInit = M.getOrInsertFunction("__tsan_init",
-                                          IRB.getVoidTy(), NULL);
-  appendToGlobalCtors(M, cast<Function>(TsanInit), 0);
-
   // Initialize the callbacks.
   TsanFuncEntry = checkInterfaceFunction(M.getOrInsertFunction(
       "__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), NULL));
@@ -188,6 +179,8 @@ bool ThreadSanitizer::doInitialization(Module &M) {
         NamePart = "_fetch_or";
       else if (op == AtomicRMWInst::Xor)
         NamePart = "_fetch_xor";
+      else if (op == AtomicRMWInst::Nand)
+        NamePart = "_fetch_nand";
       else
         continue;
       SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart);
@@ -207,6 +200,20 @@ bool ThreadSanitizer::doInitialization(Module &M) {
       "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, NULL));
   TsanAtomicSignalFence = checkInterfaceFunction(M.getOrInsertFunction(
       "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, NULL));
+}
+
+bool ThreadSanitizer::doInitialization(Module &M) {
+  TD = getAnalysisIfAvailable<DataLayout>();
+  if (!TD)
+    return false;
+  BL.reset(new BlackList(ClBlackListFile));
+
+  // Always insert a call to __tsan_init into the module's CTORs.
+  IRBuilder<> IRB(M.getContext());
+  Value *TsanInit = M.getOrInsertFunction("__tsan_init",
+                                          IRB.getVoidTy(), NULL);
+  appendToGlobalCtors(M, cast<Function>(TsanInit), 0);
+
   return true;
 }
 
@@ -297,6 +304,7 @@ static bool isAtomic(Instruction *I) {
 bool ThreadSanitizer::runOnFunction(Function &F) {
   if (!TD) return false;
   if (BL->isIn(F)) return false;
+  initializeCallbacks(*F.getParent());
   SmallVector<Instruction*, 8> RetVec;
   SmallVector<Instruction*, 8> AllLoadsAndStores;
   SmallVector<Instruction*, 8> LocalLoadsAndStores;
diff --git a/lib/Transforms/NaCl/ExpandTls.cpp b/lib/Transforms/NaCl/ExpandTls.cpp
index 8ce439c018..54299edf4b 100644
--- a/lib/Transforms/NaCl/ExpandTls.cpp
+++ b/lib/Transforms/NaCl/ExpandTls.cpp
@@ -239,8 +239,8 @@ static void rewriteTlsVars(Module &M, std::vector<VarInfo> *TlsVars,
   AttrBuilder B;
   B.addAttribute(Attributes::ReadOnly);
   B.addAttribute(Attributes::NoUnwind);
-  AttrListPtr ReadTpAttrs = AttrListPtr().addAttr(
-      M.getContext(), AttrListPtr::FunctionIndex,
+  AttributeSet ReadTpAttrs = AttributeSet().addAttr(
+      M.getContext(), AttributeSet::FunctionIndex,
       Attributes::get(M.getContext(), B));
   Constant *ReadTpFunc = M.getOrInsertTargetIntrinsic("llvm.nacl.read.tp",
                                                       ReadTpType,
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index b344952cc5..f43baf5a76 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -16,16 +16,16 @@
 
 #define DEBUG_TYPE "adce"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/InstIterator.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumRemoved, "Number of instructions removed");
diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
index cee5502656..6214e3b703 100644
--- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp
+++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -27,12 +27,12 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "block-placement"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Function.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Transforms/Scalar.h"
 #include <set>
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index b608a5535e..e6abfdf581 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -15,7 +15,16 @@
 
 #define DEBUG_TYPE "codegenprepare"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/IRBuilder.h"
@@ -23,14 +32,6 @@
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Pass.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/DominatorInternals.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -38,7 +39,6 @@
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Transforms/Utils/AddrModeMatcher.h"
@@ -194,9 +194,20 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
           WorkList.insert(*II);
     }
 
-    for (SmallPtrSet<BasicBlock*, 8>::iterator
-           I = WorkList.begin(), E = WorkList.end(); I != E; ++I)
-      DeleteDeadBlock(*I);
+    // Delete the dead blocks and any of their dead successors.
+    MadeChange |= !WorkList.empty();
+    while (!WorkList.empty()) {
+      BasicBlock *BB = *WorkList.begin();
+      WorkList.erase(BB);
+      SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
+
+      DeleteDeadBlock(BB);
+      
+      for (SmallVectorImpl<BasicBlock*>::iterator
+             II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
+        if (pred_begin(*II) == pred_end(*II))
+          WorkList.insert(*II);
+    }
 
     // Merge pairs of basic blocks with unconditional branches, connected by
     // a single edge.
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 369720b3dc..27efde53cd 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -20,14 +20,14 @@
 
 #define DEBUG_TYPE "constprop"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Constant.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Instruction.h"
 #include "llvm/Pass.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/InstIterator.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include <set>
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 3ec6f3dcc3..b5a2a25ba0 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -13,15 +13,15 @@
 
 #define DEBUG_TYPE "correlated-value-propagation"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumPhis,      "Number of phis propagated");
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index a2e074fae8..f260331c6d 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -18,12 +18,12 @@
 
 #define DEBUG_TYPE "dce"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Instruction.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 736cc05e04..124892887c 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -17,25 +17,25 @@
 
 #define DEBUG_TYPE "dse"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Constants.h"
 #include "llvm/DataLayout.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 using namespace llvm;
 
 STATISTIC(NumFastStores, "Number of stores deleted");
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 101009dd64..6b622c73f0 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -14,18 +14,18 @@
 
 #define DEBUG_TYPE "early-cse"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/RecyclingAllocator.h"
-#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/ScopedHashTable.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <deque>
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index f003e06699..1c540b240c 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -17,11 +17,6 @@
 
 #define DEBUG_TYPE "gvn"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Hashing.h"
@@ -37,11 +32,16 @@
 #include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/DataLayout.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/PatternMatch.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 6301aad610..486a349c55 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -53,8 +53,10 @@
 
 #define DEBUG_TYPE "global-merge"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Attributes.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalVariable.h"
@@ -62,10 +64,8 @@
 #include "llvm/Intrinsics.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumMerged      , "Number of globals merged");
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 310fd6147a..29f5a10e09 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -26,28 +26,28 @@
 
 #define DEBUG_TYPE "indvars"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/Type.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 STATISTIC(NumWidened     , "Number of indvars widened");
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index e7ffa09f17..4a4cd705e2 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -13,28 +13,28 @@
 
 #define DEBUG_TYPE "jump-threading"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Pass.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LazyValueInfo.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/DataLayout.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 using namespace llvm;
 
 STATISTIC(NumThreads, "Number of jumps threaded");
@@ -216,19 +216,24 @@ bool JumpThreading::runOnFunction(Function &F) {
 }
 
 /// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
-/// thread across it.
-static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
+/// thread across it. Stop scanning the block when passing the threshold.
+static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
+                                             unsigned Threshold) {
   /// Ignore PHI nodes, these will be flattened when duplication happens.
   BasicBlock::const_iterator I = BB->getFirstNonPHI();
 
   // FIXME: THREADING will delete values that are just used to compute the
   // branch, so they shouldn't count against the duplication cost.
 
-
   // Sum up the cost of each instruction until we get to the terminator.  Don't
   // include the terminator because the copy won't include it.
   unsigned Size = 0;
   for (; !isa<TerminatorInst>(I); ++I) {
+
+    // Stop scanning the block if we've reached the threshold.
+    if (Size > Threshold)
+      return Size;
+
     // Debugger intrinsics don't incur code size.
     if (isa<DbgInfoIntrinsic>(I)) continue;
 
@@ -1337,7 +1342,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
     return false;
   }
 
-  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB, Threshold);
   if (JumpThreadCost > Threshold) {
     DEBUG(dbgs() << "  Not threading BB '" << BB->getName()
           << "' - Cost is too high: " << JumpThreadCost << "\n");
@@ -1481,7 +1486,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
     return false;
   }
 
-  unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
+  unsigned DuplicationCost = getJumpThreadDuplicationCost(BB, Threshold);
   if (DuplicationCost > Threshold) {
     DEBUG(dbgs() << "  Not duplicating BB '" << BB->getName()
           << "' - Cost is too high: " << DuplicationCost << "\n");
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 4818437c24..7ef1d34d3f 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -32,27 +32,27 @@
 
 #define DEBUG_TYPE "licm"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Constants.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -90,6 +90,8 @@ namespace {
       AU.addRequired<TargetLibraryInfo>();
     }
 
+    using llvm::Pass::doFinalization;
+
     bool doFinalization() {
       assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
       return false;
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 3771f5aa97..9c67e327e2 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -16,11 +16,11 @@
 
 #define DEBUG_TYPE "loop-delete"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Analysis/LoopPass.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 STATISTIC(NumDeleted, "Number of loops deleted");
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index a44e798f12..7807e9bb4f 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -43,19 +43,20 @@
 
 #define DEBUG_TYPE "loop-idiom"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DataLayout.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/TargetTransformInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
@@ -63,16 +64,83 @@ STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
 STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
 
 namespace {
+
+  class LoopIdiomRecognize;
+
+  /// This class defines some utility functions for loop idiom recognization.
+  class LIRUtil {
+  public:
+    /// Return true iff the block contains nothing but an uncondition branch
+    /// (aka goto instruction).
+    static bool isAlmostEmpty(BasicBlock *);
+
+    static BranchInst *getBranch(BasicBlock *BB) {
+      return dyn_cast<BranchInst>(BB->getTerminator());
+    }
+
+    /// Return the condition of the branch terminating the given basic block.
+    static Value *getBrCondtion(BasicBlock *);
+
+    /// Derive the precondition block (i.e the block that guards the loop 
+    /// preheader) from the given preheader.
+    static BasicBlock *getPrecondBb(BasicBlock *PreHead);
+  };
+
+  /// This class is to recoginize idioms of population-count conducted in
+  /// a noncountable loop. Currently it only recognizes this pattern:
+  /// \code
+  ///   while(x) {cnt++; ...; x &= x - 1; ...}
+  /// \endcode
+  class NclPopcountRecognize {
+    LoopIdiomRecognize &LIR;
+    Loop *CurLoop;
+    BasicBlock *PreCondBB;
+
+    typedef IRBuilder<> IRBuilderTy;
+
+  public:
+    explicit NclPopcountRecognize(LoopIdiomRecognize &TheLIR);
+    bool recognize();
+
+  private:
+    /// Take a glimpse of the loop to see if we need to go ahead recoginizing
+    /// the idiom.
+    bool preliminaryScreen();
+
+    /// Check if the given conditional branch is based on the comparison
+    /// beween a variable and zero, and if the variable is non-zero, the
+    /// control yeilds to the loop entry. If the branch matches the behavior,
+    /// the variable involved in the comparion is returned. This function will
+    /// be called to see if the precondition and postcondition of the loop 
+    /// are in desirable form.
+    Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const;
+
+    /// Return true iff the idiom is detected in the loop. and 1) \p CntInst
+    /// is set to the instruction counting the pupulation bit. 2) \p CntPhi
+    /// is set to the corresponding phi node. 3) \p Var is set to the value
+    /// whose population bits are being counted.
+    bool detectIdiom
+      (Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const;
+
+    /// Insert ctpop intrinsic function and some obviously dead instructions.
+    void transform (Instruction *CntInst, PHINode *CntPhi, Value *Var);
+
+    /// Create llvm.ctpop.* intrinsic function.
+    CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL);
+  };
+
   class LoopIdiomRecognize : public LoopPass {
     Loop *CurLoop;
     const DataLayout *TD;
     DominatorTree *DT;
     ScalarEvolution *SE;
     TargetLibraryInfo *TLI;
+    const ScalarTargetTransformInfo *STTI;
   public:
     static char ID;
     explicit LoopIdiomRecognize() : LoopPass(ID) {
       initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+      TD = 0; DT = 0; SE = 0; TLI = 0; STTI = 0;
     }
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -110,6 +178,36 @@ namespace {
       AU.addRequired<DominatorTree>();
       AU.addRequired<TargetLibraryInfo>();
     }
+
+    const DataLayout *getDataLayout() {
+      return TD ? TD : TD=getAnalysisIfAvailable<DataLayout>();
+    }
+
+    DominatorTree *getDominatorTree() {
+      return DT ? DT : (DT=&getAnalysis<DominatorTree>());
+    }
+
+    ScalarEvolution *getScalarEvolution() {
+      return SE ? SE : (SE = &getAnalysis<ScalarEvolution>());
+    }
+
+    TargetLibraryInfo *getTargetLibraryInfo() {
+      return TLI ? TLI : (TLI = &getAnalysis<TargetLibraryInfo>());
+    }
+
+    const ScalarTargetTransformInfo *getScalarTargetTransformInfo() {
+      if (!STTI) {
+        TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>();
+        if (TTI) STTI = TTI->getScalarTargetTransformInfo();
+      }
+      return STTI;
+    }
+
+    Loop *getLoop() const { return CurLoop; }
+
+  private:
+    bool runOnNoncountableLoop();
+    bool runOnCountableLoop();
   };
 }
 
@@ -172,24 +270,393 @@ static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE,
       deleteDeadInstruction(I, SE, TLI);
 }
 
-bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
-  CurLoop = L;
+//===----------------------------------------------------------------------===//
+//
+//          Implementation of LIRUtil
+//
+//===----------------------------------------------------------------------===//
 
-  // If the loop could not be converted to canonical form, it must have an
-  // indirectbr in it, just give up.
-  if (!L->getLoopPreheader())
+// This fucntion will return true iff the given block contains nothing but goto. 
+// A typical usage of this function is to check if the preheader fucntion is 
+// "almost" empty such that generated intrinsic function can be moved across 
+// preheader and to be placed at the end of the preconditiona block without 
+// concerning of breaking data dependence.
+bool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
+  if (BranchInst *Br = getBranch(BB)) {
+    return Br->isUnconditional() && BB->size() == 1;
+  }
+  return false;
+}
+
+Value *LIRUtil::getBrCondtion(BasicBlock *BB) {
+  BranchInst *Br = getBranch(BB);
+  return Br ? Br->getCondition() : 0;
+}
+
+BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
+  if (BasicBlock *BB = PreHead->getSinglePredecessor()) {
+    BranchInst *Br = getBranch(BB);
+    return Br && Br->isConditional() ? BB : 0;
+  }
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//
+//          Implementation of NclPopcountRecognize
+//
+//===----------------------------------------------------------------------===//
+
+NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR):
+  LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(0) {
+}
+
+bool NclPopcountRecognize::preliminaryScreen() {
+  const ScalarTargetTransformInfo *STTI = LIR.getScalarTargetTransformInfo();
+  if (STTI->getPopcntHwSupport(32) != ScalarTargetTransformInfo::Fast)
     return false;
 
-  // Disable loop idiom recognition if the function's name is a common idiom.
-  StringRef Name = L->getHeader()->getParent()->getName();
-  if (Name == "memset" || Name == "memcpy")
+  // Counting population are usually conducted by few arithmetic instrutions.
+  // Such instructions can be easilly "absorbed" by vacant slots in a
+  // non-compact loop. Therefore, recognizing popcount idiom only makes sense
+  // in a compact loop.
+
+  // Give up if the loop has multiple blocks or multiple backedges.
+  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
     return false;
 
-  // The trip count of the loop must be analyzable.
-  SE = &getAnalysis<ScalarEvolution>();
-  if (!SE->hasLoopInvariantBackedgeTakenCount(L))
+  BasicBlock *LoopBody = *(CurLoop->block_begin());
+  if (LoopBody->size() >= 20) {
+    // The loop is too big, bail out.
+    return false;
+  }
+
+  // It should have a preheader containing nothing but a goto instruction.
+  BasicBlock *PreHead = CurLoop->getLoopPreheader();
+  if (!PreHead || !LIRUtil::isAlmostEmpty(PreHead))
+    return false;
+
+  // It should have a precondition block where the generated popcount instrinsic
+  // function will be inserted.
+  PreCondBB = LIRUtil::getPrecondBb(PreHead);
+  if (!PreCondBB)
+    return false;
+ 
+  return true;
+}
+
+Value *NclPopcountRecognize::matchCondition (BranchInst *Br,
+                                             BasicBlock *LoopEntry) const {
+  if (!Br || !Br->isConditional())
+    return 0;
+
+  ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition());
+  if (!Cond)
+    return 0;
+
+  ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
+  if (!CmpZero || !CmpZero->isZero())
+    return 0;
+
+  ICmpInst::Predicate Pred = Cond->getPredicate();
+  if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) ||
+      (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry))
+    return Cond->getOperand(0);
+
+  return 0;
+}
+
+bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
+                                       PHINode *&CntPhi,
+                                       Value *&Var) const {
+  // Following code tries to detect this idiom:
+  //
+  //    if (x0 != 0)
+  //      goto loop-exit // the precondition of the loop
+  //    cnt0 = init-val;
+  //    do {
+  //       x1 = phi (x0, x2);
+  //       cnt1 = phi(cnt0, cnt2);
+  //
+  //       cnt2 = cnt1 + 1;
+  //        ...
+  //       x2 = x1 & (x1 - 1);
+  //        ...
+  //    } while(x != 0);
+  //
+  // loop-exit:
+  //
+
+  // step 1: Check to see if the look-back branch match this pattern:
+  //    "if (a!=0) goto loop-entry".
+  BasicBlock *LoopEntry;
+  Instruction *DefX2, *CountInst;
+  Value *VarX1, *VarX0;
+  PHINode *PhiX, *CountPhi;
+
+  DefX2 = CountInst = 0;
+  VarX1 = VarX0 = 0;
+  PhiX = CountPhi = 0;
+  LoopEntry = *(CurLoop->block_begin());
+
+  // step 1: Check if the loop-back branch is in desirable form.
+  {
+    if (Value *T = matchCondition (LIRUtil::getBranch(LoopEntry), LoopEntry))
+      DefX2 = dyn_cast<Instruction>(T);
+    else
+      return false;
+  }
+
+  // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
+  {
+    if (DefX2->getOpcode() != Instruction::And)
+      return false;
+
+    BinaryOperator *SubOneOp;
+
+    if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
+      VarX1 = DefX2->getOperand(1);
+    else {
+      VarX1 = DefX2->getOperand(0);
+      SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
+    }
+    if (!SubOneOp)
+      return false;
+
+    Instruction *SubInst = cast<Instruction>(SubOneOp);
+    ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1));
+    if (!Dec ||
+        !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
+          (SubInst->getOpcode() == Instruction::Add && Dec->isAllOnesValue()))) {
+      return false;
+    }
+  }
+
+  // step 3: Check the recurrence of variable X
+  {
+    PhiX = dyn_cast<PHINode>(VarX1);
+    if (!PhiX ||
+        (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) {
+      return false;
+    }
+  }
+
+  // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
+  {
+    CountInst = NULL;
+    for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(),
+           IterE = LoopEntry->end(); Iter != IterE; Iter++) {
+      Instruction *Inst = Iter;
+      if (Inst->getOpcode() != Instruction::Add)
+        continue;
+
+      ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
+      if (!Inc || !Inc->isOne())
+        continue;
+
+      PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
+      if (!Phi || Phi->getParent() != LoopEntry)
+        continue;
+
+      // Check if the result of the instruction is live of the loop.
+      bool LiveOutLoop = false;
+      for (Value::use_iterator I = Inst->use_begin(), E = Inst->use_end();
+             I != E;  I++) {
+        if ((cast<Instruction>(*I))->getParent() != LoopEntry) {
+          LiveOutLoop = true; break;
+        }
+      }
+
+      if (LiveOutLoop) {
+        CountInst = Inst;
+        CountPhi = Phi;
+        break;
+      }
+    }
+
+    if (!CountInst)
+      return false;
+  }
+
+  // step 5: check if the precondition is in this form:
+  //   "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
+  {
+    BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
+    Value *T = matchCondition (PreCondBr, CurLoop->getLoopPreheader());
+    if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
+      return false;
+
+    CntInst = CountInst;
+    CntPhi = CountPhi;
+    Var = T;
+  }
+
+  return true;
+}
+
+void NclPopcountRecognize::transform(Instruction *CntInst,
+                                     PHINode *CntPhi, Value *Var) {
+
+  ScalarEvolution *SE = LIR.getScalarEvolution();
+  TargetLibraryInfo *TLI = LIR.getTargetLibraryInfo();
+  BasicBlock *PreHead = CurLoop->getLoopPreheader();
+  BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
+  const DebugLoc DL = CntInst->getDebugLoc();
+
+  // Assuming before transformation, the loop is following:
+  //  if (x) // the precondition
+  //     do { cnt++; x &= x - 1; } while(x);
+ 
+  // Step 1: Insert the ctpop instruction at the end of the precondition block
+  IRBuilderTy Builder(PreCondBr);
+  Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
+  {
+    PopCnt = createPopcntIntrinsic(Builder, Var, DL);
+    NewCount = PopCntZext =
+      Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
+
+    if (NewCount != PopCnt)
+      (cast<Instruction>(NewCount))->setDebugLoc(DL);
+
+    // TripCnt is exactly the number of iterations the loop has
+    TripCnt = NewCount;
+
+    // If the popoulation counter's initial value is not zero, insert Add Inst.
+    Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
+    ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
+    if (!InitConst || !InitConst->isZero()) {
+      NewCount = Builder.CreateAdd(NewCount, CntInitVal);
+      (cast<Instruction>(NewCount))->setDebugLoc(DL);
+    }
+  }
+
+  // Step 2: Replace the precondition from "if(x == 0) goto loop-exit" to
+  //   "if(NewCount == 0) loop-exit". Withtout this change, the intrinsic
+  //   function would be partial dead code, and downstream passes will drag
+  //   it back from the precondition block to the preheader.
+  {
+    ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
+
+    Value *Opnd0 = PopCntZext;
+    Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
+    if (PreCond->getOperand(0) != Var)
+      std::swap(Opnd0, Opnd1);
+
+    ICmpInst *NewPreCond =
+      cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
+    PreCond->replaceAllUsesWith(NewPreCond);
+
+    deleteDeadInstruction(PreCond, *SE, TLI);
+  }
+
+  // Step 3: Note that the population count is exactly the trip count of the
+  // loop in question, which enble us to to convert the loop from noncountable
+  // loop into a countable one. The benefit is twofold:
+  //
+  //  - If the loop only counts population, the entire loop become dead after
+  //    the transformation. It is lots easier to prove a countable loop dead
+  //    than to prove a noncountable one. (In some C dialects, a infite loop
+  //    isn't dead even if it computes nothing useful. In general, DCE needs
+  //    to prove a noncountable loop finite before safely delete it.)
+  //
+  //  - If the loop also performs something else, it remains alive.
+  //    Since it is transformed to countable form, it can be aggressively
+  //    optimized by some optimizations which are in general not applicable
+  //    to a noncountable loop.
+  //
+  // After this step, this loop (conceptually) would look like following:
+  //   newcnt = __builtin_ctpop(x);
+  //   t = newcnt;
+  //   if (x)
+  //     do { cnt++; x &= x-1; t--) } while (t > 0);
+  BasicBlock *Body = *(CurLoop->block_begin());
+  {
+    BranchInst *LbBr = LIRUtil::getBranch(Body);
+    ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
+    Type *Ty = TripCnt->getType();
+
+    PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", Body->begin());
+
+    Builder.SetInsertPoint(LbCond);
+    Value *Opnd1 = cast<Value>(TcPhi);
+    Value *Opnd2 = cast<Value>(ConstantInt::get(Ty, 1));
+    Instruction *TcDec =
+      cast<Instruction>(Builder.CreateSub(Opnd1, Opnd2, "tcdec", false, true));
+
+    TcPhi->addIncoming(TripCnt, PreHead);
+    TcPhi->addIncoming(TcDec, Body);
+
+    CmpInst::Predicate Pred = (LbBr->getSuccessor(0) == Body) ?
+      CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
+    LbCond->setPredicate(Pred);
+    LbCond->setOperand(0, TcDec);
+    LbCond->setOperand(1, cast<Value>(ConstantInt::get(Ty, 0)));
+  }
+
+  // Step 4: All the references to the original population counter outside
+  //  the loop are replaced with the NewCount -- the value returned from
+  //  __builtin_ctpop().
+  {
+    SmallVector<Value *, 4> CntUses;
+    for (Value::use_iterator I = CntInst->use_begin(), E = CntInst->use_end();
+         I != E; I++) {
+      if (cast<Instruction>(*I)->getParent() != Body)
+        CntUses.push_back(*I);
+    }
+    for (unsigned Idx = 0; Idx < CntUses.size(); Idx++) {
+      (cast<Instruction>(CntUses[Idx]))->replaceUsesOfWith(CntInst, NewCount);
+    }
+  }
+
+  // step 5: Forget the "non-computable" trip-count SCEV associated with the
+  //   loop. The loop would otherwise not be deleted even if it becomes empty.
+  SE->forgetLoop(CurLoop);
+}
+
+CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder, 
+                                                      Value *Val, DebugLoc DL) {
+  Value *Ops[] = { Val };
+  Type *Tys[] = { Val->getType() };
+
+  Module *M = (*(CurLoop->block_begin()))->getParent()->getParent();
+  Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
+  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
+  CI->setDebugLoc(DL);
+
+  return CI;
+}
+
+/// recognize - detect population count idiom in a non-countable loop. If
+///   detected, transform the relevant code to popcount intrinsic function
+///   call, and return true; otherwise, return false.
+bool NclPopcountRecognize::recognize() {
+
+  if (!LIR.getScalarTargetTransformInfo())
     return false;
-  const SCEV *BECount = SE->getBackedgeTakenCount(L);
+
+  LIR.getScalarEvolution();
+
+  if (!preliminaryScreen())
+    return false;
+
+  Instruction *CntInst;
+  PHINode *CntPhi;
+  Value *Val;
+  if (!detectIdiom(CntInst, CntPhi, Val))
+    return false;
+
+  transform(CntInst, CntPhi, Val);
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+//
+//          Implementation of LoopIdiomRecognize
+//
+//===----------------------------------------------------------------------===//
+
+bool LoopIdiomRecognize::runOnCountableLoop() {
+  const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
   if (isa<SCEVCouldNotCompute>(BECount)) return false;
 
   // If this loop executes exactly one time, then it should be peeled, not
@@ -199,24 +666,27 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
       return false;
 
   // We require target data for now.
-  TD = getAnalysisIfAvailable<DataLayout>();
-  if (TD == 0) return false;
+  if (!getDataLayout())
+    return false;
+
+  getDominatorTree();
 
-  DT = &getAnalysis<DominatorTree>();
   LoopInfo &LI = getAnalysis<LoopInfo>();
   TLI = &getAnalysis<TargetLibraryInfo>();
 
+  getTargetLibraryInfo();
+
   SmallVector<BasicBlock*, 8> ExitBlocks;
   CurLoop->getUniqueExitBlocks(ExitBlocks);
 
   DEBUG(dbgs() << "loop-idiom Scanning: F["
-               << L->getHeader()->getParent()->getName()
-               << "] Loop %" << L->getHeader()->getName() << "\n");
+               << CurLoop->getHeader()->getParent()->getName()
+               << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
 
   bool MadeChange = false;
   // Scan all the blocks in the loop that are not in subloops.
-  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
-       ++BI) {
+  for (Loop::block_iterator BI = CurLoop->block_begin(),
+         E = CurLoop->block_end(); BI != E; ++BI) {
     // Ignore blocks in subloops.
     if (LI.getLoopFor(*BI) != CurLoop)
       continue;
@@ -226,6 +696,33 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
   return MadeChange;
 }
 
+bool LoopIdiomRecognize::runOnNoncountableLoop() {
+  NclPopcountRecognize Popcount(*this);
+  if (Popcount.recognize())
+    return true;
+
+  return false;
+}
+
+bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+  CurLoop = L;
+
+  // If the loop could not be converted to canonical form, it must have an
+  // indirectbr in it, just give up.
+  if (!L->getLoopPreheader())
+    return false;
+
+  // Disable loop idiom recognition if the function's name is a common idiom.
+  StringRef Name = L->getHeader()->getParent()->getName();
+  if (Name == "memset" || Name == "memcpy")
+    return false;
+
+  SE = &getAnalysis<ScalarEvolution>();
+  if (SE->hasLoopInvariantBackedgeTakenCount(L))
+    return runOnCountableLoop();
+  return runOnNoncountableLoop();
+}
+
 /// runOnLoopBlock - Process the specified block, which lives in a counted loop
 /// with the specified backedge count.  This block is known to be in the current
 /// loop and not in any subloops.
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 558f62e6b4..10ba22434a 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -12,17 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "loop-instsimplify"
-#include "llvm/Instructions.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/DataLayout.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 STATISTIC(NumSimplified, "Number of redundant instructions simplified");
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index abe07aa9d3..249baf5164 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -13,20 +13,20 @@
 
 #define DEBUG_TYPE "loop-rotate"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Function.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
 #define MAX_HEADER_SIZE 16
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 958348d9fa..d571ba3fe0 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -54,27 +54,27 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "loop-reduce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/AddressingMode.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 0d781ac977..2b15528411 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -13,16 +13,16 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "loop-unroll"
-#include "llvm/IntrinsicInst.h"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/DataLayout.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
-#include "llvm/DataLayout.h"
 #include <climits>
 
 using namespace llvm;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 047b43eb84..d41da4a9a9 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -28,25 +28,25 @@
 
 #define DEBUG_TYPE "loop-unswitch"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <map>
 #include <set>
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 517657cf52..26b6269f42 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -14,20 +14,20 @@
 
 #define DEBUG_TYPE "memcpyopt"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DataLayout.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <list>
diff --git a/lib/Transforms/Scalar/NaClCcRewrite.cpp b/lib/Transforms/Scalar/NaClCcRewrite.cpp
index 4ac361835d..9109e22592 100644
--- a/lib/Transforms/Scalar/NaClCcRewrite.cpp
+++ b/lib/Transforms/Scalar/NaClCcRewrite.cpp
@@ -596,7 +596,7 @@ void UpdateFunctionSignature(Function &F,
   Attributes fattr = F.getAttributes().getFnAttributes();
   if (fattr.hasAttributes())
     new_attributes_vec.push_back(AttributeWithIndex::get(~0, fattr));
-  F.setAttributes(AttrListPtr::get(F.getContext(), new_attributes_vec));
+  F.setAttributes(AttributeSet::get(F.getContext(), new_attributes_vec));
 }
 
 
@@ -805,7 +805,7 @@ void ExtractOperandsAndAttributesFromCallInst(
   std::vector<Value*>& operands,
   std::vector<Attributes>& attributes) {
 
-  AttrListPtr PAL = call->getAttributes();
+  AttributeSet PAL = call->getAttributes();
   // last operand is: function
   for (size_t i = 0; i <  call->getNumOperands() - 1; ++i) {
     operands.push_back(call->getArgOperand(i));
@@ -819,7 +819,7 @@ void ExtractOperandsAndAttributesFromeInvokeInst(
   InvokeInst* call,
   std::vector<Value*>& operands,
   std::vector<Attributes>& attributes) {
-  AttrListPtr PAL = call->getAttributes();
+  AttributeSet PAL = call->getAttributes();
   // last three operands are: function, bb-normal, bb-exception
   for (size_t i = 0; i <  call->getNumOperands() - 3; ++i) {
     operands.push_back(call->getArgOperand(i));
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index dfdf50549d..ce397658bf 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -29,9 +29,9 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "objc-arc"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
 // A handy option to enable/disable all optimizations in this file.
@@ -132,12 +132,12 @@ namespace {
 // ARC Utilities.
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Module.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Transforms/Utils/Local.h"
 
 namespace {
   /// InstructionClass - A simple classification for instructions.
@@ -660,9 +660,9 @@ static bool DoesObjCBlockEscape(const Value *BlockPtr) {
 // ARC AliasAnalysis.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Pass.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
 
 namespace {
   /// ObjCARCAliasAnalysis - This is a simple alias analysis
@@ -912,8 +912,8 @@ bool ObjCARCExpand::runOnFunction(Function &F) {
 // ARC autorelease pool elimination.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Constants.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Constants.h"
 
 namespace {
   /// ObjCARCAPElim - Autorelease pool elimination.
@@ -1093,10 +1093,10 @@ bool ObjCARCAPElim::runOnModule(Module &M) {
 
 // TODO: Delete release+retain pairs (rare).
 
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallPtrSet.h"
 
 STATISTIC(NumNoops,       "Number of no-op objc calls eliminated");
 STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
@@ -1788,8 +1788,8 @@ Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
     Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
     Type *Params[] = { I8X };
     FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttrListPtr Attributes =
-      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+    AttributeSet Attributes =
+      AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
                             Attributes::get(C, Attributes::NoUnwind));
     RetainRVCallee =
       M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
@@ -1804,8 +1804,8 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
     Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
     Type *Params[] = { I8X };
     FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttrListPtr Attributes =
-      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+    AttributeSet Attributes =
+      AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
                             Attributes::get(C, Attributes::NoUnwind));
     AutoreleaseRVCallee =
       M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
@@ -1818,8 +1818,8 @@ Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
   if (!ReleaseCallee) {
     LLVMContext &C = M->getContext();
     Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
-    AttrListPtr Attributes =
-      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+    AttributeSet Attributes =
+      AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
                             Attributes::get(C, Attributes::NoUnwind));
     ReleaseCallee =
       M->getOrInsertFunction(
@@ -1834,8 +1834,8 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) {
   if (!RetainCallee) {
     LLVMContext &C = M->getContext();
     Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
-    AttrListPtr Attributes =
-      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+    AttributeSet Attributes =
+      AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
                             Attributes::get(C, Attributes::NoUnwind));
     RetainCallee =
       M->getOrInsertFunction(
@@ -1856,7 +1856,7 @@ Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) {
       M->getOrInsertFunction(
         "objc_retainBlock",
         FunctionType::get(Params[0], Params, /*isVarArg=*/false),
-        AttrListPtr());
+        AttributeSet());
   }
   return RetainBlockCallee;
 }
@@ -1865,8 +1865,8 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
   if (!AutoreleaseCallee) {
     LLVMContext &C = M->getContext();
     Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
-    AttrListPtr Attributes =
-      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+    AttributeSet Attributes =
+      AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
                             Attributes::get(C, Attributes::NoUnwind));
     AutoreleaseCallee =
       M->getOrInsertFunction(
@@ -3756,9 +3756,9 @@ void ObjCARCOpt::releaseMemory() {
 // TODO: ObjCARCContract could insert PHI nodes when uses aren't
 // dominated by single calls.
 
-#include "llvm/Operator.h"
-#include "llvm/InlineAsm.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Operator.h"
 
 STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
 
@@ -3840,8 +3840,8 @@ Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
     Type *I8XX = PointerType::getUnqual(I8X);
     Type *Params[] = { I8XX, I8X };
 
-    AttrListPtr Attributes = AttrListPtr()
-      .addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+    AttributeSet Attributes = AttributeSet()
+      .addAttr(M->getContext(), AttributeSet::FunctionIndex,
                Attributes::get(C, Attributes::NoUnwind))
       .addAttr(M->getContext(), 1, Attributes::get(C, Attributes::NoCapture));
 
@@ -3860,8 +3860,8 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
     Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
     Type *Params[] = { I8X };
     FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttrListPtr Attributes =
-      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+    AttributeSet Attributes =
+      AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
                             Attributes::get(C, Attributes::NoUnwind));
     RetainAutoreleaseCallee =
       M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes);
@@ -3875,8 +3875,8 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
     Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
     Type *Params[] = { I8X };
     FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttrListPtr Attributes =
-      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+    AttributeSet Attributes =
+      AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
                             Attributes::get(C, Attributes::NoUnwind));
     RetainAutoreleaseRVCallee =
       M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 377c07020d..569439aaf4 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -22,7 +22,12 @@
 
 #define DEBUG_TYPE "reassociate"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -30,16 +35,11 @@
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Pass.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index ea1de63de7..5524e01230 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -18,15 +18,15 @@
 
 #define DEBUG_TYPE "reg2mem"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/BasicBlock.h"
 #include "llvm/Function.h"
+#include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Instructions.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <list>
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 686520e724..28aaddc50e 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -19,26 +19,26 @@
 
 #define DEBUG_TYPE "sccp"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/InstVisitor.h"
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/PointerIntPair.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 8284e144b2..1c220ca0f6 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -25,33 +25,34 @@
 
 #define DEBUG_TYPE "sroa"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/PtrUseVisitor.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Constants.h"
 #include "llvm/DIBuilder.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/IRBuilder.h"
+#include "llvm/InstVisitor.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Operator.h"
 #include "llvm/Pass.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -404,106 +405,17 @@ private:
 };
 }
 
-template <typename DerivedT, typename RetT>
-class AllocaPartitioning::BuilderBase
-    : public InstVisitor<DerivedT, RetT> {
-public:
-  BuilderBase(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P)
-      : TD(TD),
-        AllocSize(TD.getTypeAllocSize(AI.getAllocatedType())),
-        P(P) {
-    enqueueUsers(AI, 0);
-  }
-
-protected:
-  const DataLayout &TD;
-  const uint64_t AllocSize;
-  AllocaPartitioning &P;
-
-  SmallPtrSet<Use *, 8> VisitedUses;
-
-  struct OffsetUse {
-    Use *U;
-    int64_t Offset;
-  };
-  SmallVector<OffsetUse, 8> Queue;
-
-  // The active offset and use while visiting.
-  Use *U;
-  int64_t Offset;
-
-  void enqueueUsers(Instruction &I, int64_t UserOffset) {
-    for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
-         UI != UE; ++UI) {
-      if (VisitedUses.insert(&UI.getUse())) {
-        OffsetUse OU = { &UI.getUse(), UserOffset };
-        Queue.push_back(OU);
-      }
-    }
-  }
-
-  bool computeConstantGEPOffset(GetElementPtrInst &GEPI, int64_t &GEPOffset) {
-    GEPOffset = Offset;
-    for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI);
-         GTI != GTE; ++GTI) {
-      ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
-      if (!OpC)
-        return false;
-      if (OpC->isZero())
-        continue;
-
-      // Handle a struct index, which adds its field offset to the pointer.
-      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
-        unsigned ElementIdx = OpC->getZExtValue();
-        const StructLayout *SL = TD.getStructLayout(STy);
-        uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
-        // Check that we can continue to model this GEP in a signed 64-bit offset.
-        if (ElementOffset > INT64_MAX ||
-            (GEPOffset >= 0 &&
-             ((uint64_t)GEPOffset + ElementOffset) > INT64_MAX)) {
-          DEBUG(dbgs() << "WARNING: Encountered a cumulative offset exceeding "
-                       << "what can be represented in an int64_t!\n"
-                       << "  alloca: " << P.AI << "\n");
-          return false;
-        }
-        if (GEPOffset < 0)
-          GEPOffset = ElementOffset + (uint64_t)-GEPOffset;
-        else
-          GEPOffset += ElementOffset;
-        continue;
-      }
-
-      APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits());
-      Index *= APInt(Index.getBitWidth(),
-                     TD.getTypeAllocSize(GTI.getIndexedType()));
-      Index += APInt(Index.getBitWidth(), (uint64_t)GEPOffset,
-                     /*isSigned*/true);
-      // Check if the result can be stored in our int64_t offset.
-      if (!Index.isSignedIntN(sizeof(GEPOffset) * 8)) {
-        DEBUG(dbgs() << "WARNING: Encountered a cumulative offset exceeding "
-                     << "what can be represented in an int64_t!\n"
-                     << "  alloca: " << P.AI << "\n");
-        return false;
-      }
-
-      GEPOffset = Index.getSExtValue();
-    }
-    return true;
+static Value *foldSelectInst(SelectInst &SI) {
+  // If the condition being selected on is a constant or the same value is
+  // being selected between, fold the select. Yes this does (rarely) happen
+  // early on.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
+    return SI.getOperand(1+CI->isZero());
+  if (SI.getOperand(1) == SI.getOperand(2)) {
+    return SI.getOperand(1);
   }
-
-  Value *foldSelectInst(SelectInst &SI) {
-    // If the condition being selected on is a constant or the same value is
-    // being selected between, fold the select. Yes this does (rarely) happen
-    // early on.
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
-      return SI.getOperand(1+CI->isZero());
-    if (SI.getOperand(1) == SI.getOperand(2)) {
-      assert(*U == SI.getOperand(1));
-      return SI.getOperand(1);
-    }
-    return 0;
-  }
-};
+  return 0;
+}
 
 /// \brief Builder for the alloca partitioning.
 ///
@@ -511,60 +423,38 @@ protected:
 /// of an alloca and splitting the partitions for each load and store at each
 /// offset.
 class AllocaPartitioning::PartitionBuilder
-    : public BuilderBase<PartitionBuilder, bool> {
-  friend class InstVisitor<PartitionBuilder, bool>;
+    : public PtrUseVisitor<PartitionBuilder> {
+  friend class PtrUseVisitor<PartitionBuilder>;
+  friend class InstVisitor<PartitionBuilder>;
+  typedef PtrUseVisitor<PartitionBuilder> Base;
+
+  const uint64_t AllocSize;
+  AllocaPartitioning &P;
 
   SmallDenseMap<Instruction *, unsigned> MemTransferPartitionMap;
 
 public:
-  PartitionBuilder(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P)
-      : BuilderBase<PartitionBuilder, bool>(TD, AI, P) {}
-
-  /// \brief Run the builder over the allocation.
-  bool operator()() {
-    // Note that we have to re-evaluate size on each trip through the loop as
-    // the queue grows at the tail.
-    for (unsigned Idx = 0; Idx < Queue.size(); ++Idx) {
-      U = Queue[Idx].U;
-      Offset = Queue[Idx].Offset;
-      if (!visit(cast<Instruction>(U->getUser())))
-        return false;
-    }
-    return true;
-  }
+  PartitionBuilder(const DataLayout &DL, AllocaInst &AI, AllocaPartitioning &P)
+      : PtrUseVisitor<PartitionBuilder>(DL),
+        AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())),
+        P(P) {}
 
 private:
-  bool markAsEscaping(Instruction &I) {
-    P.PointerEscapingInstr = &I;
-    return false;
-  }
-
-  void insertUse(Instruction &I, int64_t Offset, uint64_t Size,
+  void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
                  bool IsSplittable = false) {
-    // Completely skip uses which have a zero size or don't overlap the
-    // allocation.
-    if (Size == 0 ||
-        (Offset >= 0 && (uint64_t)Offset >= AllocSize) ||
-        (Offset < 0 && (uint64_t)-Offset >= Size)) {
+    // Completely skip uses which have a zero size or start either before or
+    // past the end of the allocation.
+    if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize)) {
       DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
-                   << " which starts past the end of the " << AllocSize
-                   << " byte alloca:\n"
+                   << " which has zero size or starts outside of the "
+                   << AllocSize << " byte alloca:\n"
                    << "    alloca: " << P.AI << "\n"
                    << "       use: " << I << "\n");
       return;
     }
 
-    // Clamp the start to the beginning of the allocation.
-    if (Offset < 0) {
-      DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
-                   << " to start at the beginning of the alloca:\n"
-                   << "    alloca: " << P.AI << "\n"
-                   << "       use: " << I << "\n");
-      Size -= (uint64_t)-Offset;
-      Offset = 0;
-    }
-
-    uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size;
+    uint64_t BeginOffset = Offset.getZExtValue();
+    uint64_t EndOffset = BeginOffset + Size;
 
     // Clamp the end offset to the end of the allocation. Note that this is
     // formulated to handle even the case where "BeginOffset + Size" overflows.
@@ -585,9 +475,9 @@ private:
     P.Partitions.push_back(New);
   }
 
-  bool handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset,
+  void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
                          bool IsVolatile) {
-    uint64_t Size = TD.getTypeStoreSize(Ty);
+    uint64_t Size = DL.getTypeStoreSize(Ty);
 
     // If this memory access can be shown to *statically* extend outside the
     // bounds of of the allocation, it's behavior is undefined, so simply
@@ -596,15 +486,15 @@ private:
     // risk of overflow.
     // FIXME: We should instead consider the pointer to have escaped if this
     // function is being instrumented for addressing bugs or race conditions.
-    if (Offset < 0 || (uint64_t)Offset >= AllocSize ||
-        Size > (AllocSize - (uint64_t)Offset)) {
+    if (Offset.isNegative() || Size > AllocSize ||
+        Offset.ugt(AllocSize - Size)) {
       DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte "
                    << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset
                    << " which extends past the end of the " << AllocSize
                    << " byte alloca:\n"
                    << "    alloca: " << P.AI << "\n"
                    << "       use: " << I << "\n");
-      return true;
+      return;
     }
 
     // We allow splitting of loads and stores where the type is an integer type
@@ -615,54 +505,61 @@ private:
       IsSplittable = !IsVolatile && ITy->getBitWidth() == AllocSize*8;
 
     insertUse(I, Offset, Size, IsSplittable);
-    return true;
-  }
-
-  bool visitBitCastInst(BitCastInst &BC) {
-    enqueueUsers(BC, Offset);
-    return true;
-  }
-
-  bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
-    int64_t GEPOffset;
-    if (!computeConstantGEPOffset(GEPI, GEPOffset))
-      return markAsEscaping(GEPI);
-
-    enqueueUsers(GEPI, GEPOffset);
-    return true;
   }
 
-  bool visitLoadInst(LoadInst &LI) {
+  void visitLoadInst(LoadInst &LI) {
     assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
            "All simple FCA loads should have been pre-split");
+
+    if (!IsOffsetKnown)
+      return PI.setAborted(&LI);
+
     return handleLoadOrStore(LI.getType(), LI, Offset, LI.isVolatile());
   }
 
-  bool visitStoreInst(StoreInst &SI) {
+  void visitStoreInst(StoreInst &SI) {
     Value *ValOp = SI.getValueOperand();
     if (ValOp == *U)
-      return markAsEscaping(SI);
+      return PI.setEscapedAndAborted(&SI);
+    if (!IsOffsetKnown)
+      return PI.setAborted(&SI);
 
     assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
            "All simple FCA stores should have been pre-split");
-    return handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
+    handleLoadOrStore(ValOp->getType(), SI, Offset, SI.isVolatile());
   }
 
 
-  bool visitMemSetInst(MemSetInst &II) {
+  void visitMemSetInst(MemSetInst &II) {
     assert(II.getRawDest() == *U && "Pointer use is not the destination?");
     ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
-    uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;
-    insertUse(II, Offset, Size, Length);
-    return true;
+    if ((Length && Length->getValue() == 0) ||
+        (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
+      // Zero-length mem transfer intrinsics can be ignored entirely.
+      return;
+
+    if (!IsOffsetKnown)
+      return PI.setAborted(&II);
+
+    insertUse(II, Offset,
+              Length ? Length->getLimitedValue()
+                     : AllocSize - Offset.getLimitedValue(),
+              (bool)Length);
   }
 
-  bool visitMemTransferInst(MemTransferInst &II) {
+  void visitMemTransferInst(MemTransferInst &II) {
     ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
-    uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;
-    if (!Size)
+    if ((Length && Length->getValue() == 0) ||
+        (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
       // Zero-length mem transfer intrinsics can be ignored entirely.
-      return true;
+      return;
+
+    if (!IsOffsetKnown)
+      return PI.setAborted(&II);
+
+    uint64_t RawOffset = Offset.getLimitedValue();
+    uint64_t Size = Length ? Length->getLimitedValue()
+                           : AllocSize - RawOffset;
 
     MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
 
@@ -670,12 +567,12 @@ private:
     Offsets.IsSplittable = Length;
 
     if (*U == II.getRawDest()) {
-      Offsets.DestBegin = Offset;
-      Offsets.DestEnd = Offset + Size;
+      Offsets.DestBegin = RawOffset;
+      Offsets.DestEnd = RawOffset + Size;
     }
     if (*U == II.getRawSource()) {
-      Offsets.SourceBegin = Offset;
-      Offsets.SourceEnd = Offset + Size;
+      Offsets.SourceBegin = RawOffset;
+      Offsets.SourceEnd = RawOffset + Size;
     }
 
     // If we have set up end offsets for both the source and the destination,
@@ -688,7 +585,7 @@ private:
       // In that case, we can completely elide the transfer.
       if (!II.isVolatile() && Offsets.SourceBegin == Offsets.DestBegin) {
         P.Partitions[PrevIdx].kill();
-        return true;
+        return;
       }
 
       // Otherwise we have an offset transfer within the same alloca. We can't
@@ -701,7 +598,7 @@ private:
 
       // For non-volatile transfers this is a no-op.
       if (!II.isVolatile())
-        return true;
+        return;
 
       // Otherwise just suppress splitting.
       Offsets.IsSplittable = false;
@@ -721,23 +618,25 @@ private:
              "Already have intrinsic in map but haven't seen both ends");
       (void)Inserted;
     }
-
-    return true;
   }
 
   // Disable SRoA for any intrinsics except for lifetime invariants.
   // FIXME: What about debug instrinsics? This matches old behavior, but
   // doesn't make sense.
-  bool visitIntrinsicInst(IntrinsicInst &II) {
+  void visitIntrinsicInst(IntrinsicInst &II) {
+    if (!IsOffsetKnown)
+      return PI.setAborted(&II);
+
     if (II.getIntrinsicID() == Intrinsic::lifetime_start ||
         II.getIntrinsicID() == Intrinsic::lifetime_end) {
       ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
-      uint64_t Size = std::min(AllocSize - Offset, Length->getLimitedValue());
+      uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(),
+                               Length->getLimitedValue());
       insertUse(II, Offset, Size, true);
-      return true;
+      return;
     }
 
-    return markAsEscaping(II);
+    Base::visitIntrinsicInst(II);
   }
 
   Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
@@ -757,14 +656,14 @@ private:
       llvm::tie(UsedI, I) = Uses.pop_back_val();
 
       if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-        Size = std::max(Size, TD.getTypeStoreSize(LI->getType()));
+        Size = std::max(Size, DL.getTypeStoreSize(LI->getType()));
         continue;
       }
       if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
         Value *Op = SI->getOperand(0);
         if (Op == UsedI)
           return SI;
-        Size = std::max(Size, TD.getTypeStoreSize(Op->getType()));
+        Size = std::max(Size, DL.getTypeStoreSize(Op->getType()));
         continue;
       }
 
@@ -785,54 +684,62 @@ private:
     return 0;
   }
 
-  bool visitPHINode(PHINode &PN) {
+  void visitPHINode(PHINode &PN) {
+    if (PN.use_empty())
+      return;
+    if (!IsOffsetKnown)
+      return PI.setAborted(&PN);
+
     // See if we already have computed info on this node.
     std::pair<uint64_t, bool> &PHIInfo = P.PHIOrSelectSizes[&PN];
     if (PHIInfo.first) {
       PHIInfo.second = true;
       insertUse(PN, Offset, PHIInfo.first);
-      return true;
+      return;
     }
 
     // Check for an unsafe use of the PHI node.
-    if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first))
-      return markAsEscaping(*EscapingI);
+    if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first))
+      return PI.setAborted(UnsafeI);
 
     insertUse(PN, Offset, PHIInfo.first);
-    return true;
   }
 
-  bool visitSelectInst(SelectInst &SI) {
+  void visitSelectInst(SelectInst &SI) {
+    if (SI.use_empty())
+      return;
     if (Value *Result = foldSelectInst(SI)) {
       if (Result == *U)
         // If the result of the constant fold will be the pointer, recurse
         // through the select as if we had RAUW'ed it.
-        enqueueUsers(SI, Offset);
+        enqueueUsers(SI);
 
-      return true;
+      return;
     }
+    if (!IsOffsetKnown)
+      return PI.setAborted(&SI);
 
     // See if we already have computed info on this node.
     std::pair<uint64_t, bool> &SelectInfo = P.PHIOrSelectSizes[&SI];
     if (SelectInfo.first) {
       SelectInfo.second = true;
       insertUse(SI, Offset, SelectInfo.first);
-      return true;
+      return;
     }
 
     // Check for an unsafe use of the PHI node.
-    if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first))
-      return markAsEscaping(*EscapingI);
+    if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first))
+      return PI.setAborted(UnsafeI);
 
     insertUse(SI, Offset, SelectInfo.first);
-    return true;
   }
 
   /// \brief Disable SROA entirely if there are unhandled users of the alloca.
-  bool visitInstruction(Instruction &I) { return markAsEscaping(I); }
+  void visitInstruction(Instruction &I) {
+    PI.setAborted(&I);
+  }
 };
 
-
 /// \brief Use adder for the alloca partitioning.
 ///
 /// This class adds the uses of an alloca to all of the partitions which they
@@ -851,26 +758,22 @@ private:
 /// partition space is pre-sorted, and do a logarithmic search for the
 /// partition needed, making the total visit a classical ((N + M) * log(N))
 /// complexity operation.
-class AllocaPartitioning::UseBuilder : public BuilderBase<UseBuilder> {
+class AllocaPartitioning::UseBuilder : public PtrUseVisitor<UseBuilder> {
+  friend class PtrUseVisitor<UseBuilder>;
   friend class InstVisitor<UseBuilder>;
+  typedef PtrUseVisitor<UseBuilder> Base;
+
+  const uint64_t AllocSize;
+  AllocaPartitioning &P;
 
   /// \brief Set to de-duplicate dead instructions found in the use walk.
   SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
 
 public:
   UseBuilder(const DataLayout &TD, AllocaInst &AI, AllocaPartitioning &P)
-      : BuilderBase<UseBuilder>(TD, AI, P) {}
-
-  /// \brief Run the builder over the allocation.
-  void operator()() {
-    // Note that we have to re-evaluate size on each trip through the loop as
-    // the queue grows at the tail.
-    for (unsigned Idx = 0; Idx < Queue.size(); ++Idx) {
-      U = Queue[Idx].U;
-      Offset = Queue[Idx].Offset;
-      this->visit(cast<Instruction>(U->getUser()));
-    }
-  }
+      : PtrUseVisitor<UseBuilder>(TD),
+        AllocSize(TD.getTypeAllocSize(AI.getAllocatedType())),
+        P(P) {}
 
 private:
   void markAsDead(Instruction &I) {
@@ -878,20 +781,14 @@ private:
       P.DeadUsers.push_back(&I);
   }
 
-  void insertUse(Instruction &User, int64_t Offset, uint64_t Size) {
+  void insertUse(Instruction &User, const APInt &Offset, uint64_t Size) {
     // If the use has a zero size or extends outside of the allocation, record
     // it as a dead use for elimination later.
-    if (Size == 0 || (uint64_t)Offset >= AllocSize ||
-        (Offset < 0 && (uint64_t)-Offset >= Size))
+    if (Size == 0 || Offset.isNegative() || Offset.uge(AllocSize))
       return markAsDead(User);
 
-    // Clamp the start to the beginning of the allocation.
-    if (Offset < 0) {
-      Size -= (uint64_t)-Offset;
-      Offset = 0;
-    }
-
-    uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size;
+    uint64_t BeginOffset = Offset.getZExtValue();
+    uint64_t EndOffset = BeginOffset + Size;
 
     // Clamp the end offset to the end of the allocation. Note that this is
     // formulated to handle even the case where "BeginOffset + Size" overflows.
@@ -914,15 +811,15 @@ private:
     }
   }
 
-  void handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset) {
-    uint64_t Size = TD.getTypeStoreSize(Ty);
+  void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset) {
+    uint64_t Size = DL.getTypeStoreSize(Ty);
 
     // If this memory access can be shown to *statically* extend outside the
     // bounds of of the allocation, it's behavior is undefined, so simply
     // ignore it. Note that this is more strict than the generic clamping
     // behavior of insertUse.
-    if (Offset < 0 || (uint64_t)Offset >= AllocSize ||
-        Size > (AllocSize - (uint64_t)Offset))
+    if (Offset.isNegative() || Size > AllocSize ||
+        Offset.ugt(AllocSize - Size))
       return markAsDead(I);
 
     insertUse(I, Offset, Size);
@@ -932,40 +829,47 @@ private:
     if (BC.use_empty())
       return markAsDead(BC);
 
-    enqueueUsers(BC, Offset);
+    return Base::visitBitCastInst(BC);
   }
 
   void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
     if (GEPI.use_empty())
       return markAsDead(GEPI);
 
-    int64_t GEPOffset;
-    if (!computeConstantGEPOffset(GEPI, GEPOffset))
-      llvm_unreachable("Unable to compute constant offset for use");
-
-    enqueueUsers(GEPI, GEPOffset);
+    return Base::visitGetElementPtrInst(GEPI);
   }
 
   void visitLoadInst(LoadInst &LI) {
+    assert(IsOffsetKnown);
     handleLoadOrStore(LI.getType(), LI, Offset);
   }
 
   void visitStoreInst(StoreInst &SI) {
+    assert(IsOffsetKnown);
     handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset);
   }
 
   void visitMemSetInst(MemSetInst &II) {
     ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
-    uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;
-    insertUse(II, Offset, Size);
+    if ((Length && Length->getValue() == 0) ||
+        (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
+      return markAsDead(II);
+
+    assert(IsOffsetKnown);
+    insertUse(II, Offset, Length ? Length->getLimitedValue()
+                                 : AllocSize - Offset.getLimitedValue());
   }
 
   void visitMemTransferInst(MemTransferInst &II) {
     ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
-    uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset;
-    if (!Size)
+    if ((Length && Length->getValue() == 0) ||
+        (IsOffsetKnown && !Offset.isNegative() && Offset.uge(AllocSize)))
       return markAsDead(II);
 
+    assert(IsOffsetKnown);
+    uint64_t Size = Length ? Length->getLimitedValue()
+                           : AllocSize - Offset.getLimitedValue();
+
     MemTransferOffsets &Offsets = P.MemTransferInstData[&II];
     if (!II.isVolatile() && Offsets.DestEnd && Offsets.SourceEnd &&
         Offsets.DestBegin == Offsets.SourceBegin)
@@ -975,34 +879,39 @@ private:
   }
 
   void visitIntrinsicInst(IntrinsicInst &II) {
+    assert(IsOffsetKnown);
     assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
            II.getIntrinsicID() == Intrinsic::lifetime_end);
 
     ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
-    insertUse(II, Offset,
-              std::min(AllocSize - Offset, Length->getLimitedValue()));
+    insertUse(II, Offset, std::min(Length->getLimitedValue(),
+                                   AllocSize - Offset.getLimitedValue()));
   }
 
-  void insertPHIOrSelect(Instruction &User, uint64_t Offset) {
+  void insertPHIOrSelect(Instruction &User, const APInt &Offset) {
     uint64_t Size = P.PHIOrSelectSizes.lookup(&User).first;
 
     // For PHI and select operands outside the alloca, we can't nuke the entire
     // phi or select -- the other side might still be relevant, so we special
     // case them here and use a separate structure to track the operands
     // themselves which should be replaced with undef.
-    if (Offset >= AllocSize) {
+    if ((Offset.isNegative() && Offset.uge(Size)) ||
+        (!Offset.isNegative() && Offset.uge(AllocSize))) {
       P.DeadOperands.push_back(U);
       return;
     }
 
     insertUse(User, Offset, Size);
   }
+
   void visitPHINode(PHINode &PN) {
     if (PN.use_empty())
       return markAsDead(PN);
 
+    assert(IsOffsetKnown);
     insertPHIOrSelect(PN, Offset);
   }
+
   void visitSelectInst(SelectInst &SI) {
     if (SI.use_empty())
       return markAsDead(SI);
@@ -1011,7 +920,7 @@ private:
       if (Result == *U)
         // If the result of the constant fold will be the pointer, recurse
         // through the select as if we had RAUW'ed it.
-        enqueueUsers(SI, Offset);
+        enqueueUsers(SI);
       else
         // Otherwise the operand to the select is dead, and we can replace it
         // with undef.
@@ -1020,6 +929,7 @@ private:
       return;
     }
 
+    assert(IsOffsetKnown);
     insertPHIOrSelect(SI, Offset);
   }
 
@@ -1131,8 +1041,15 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
 #endif
       PointerEscapingInstr(0) {
   PartitionBuilder PB(TD, AI, *this);
-  if (!PB())
+  PartitionBuilder::PtrInfo PtrI = PB.visitPtr(AI);
+  if (PtrI.isEscaped() || PtrI.isAborted()) {
+    // FIXME: We should sink the escape vs. abort info into the caller nicely,
+    // possibly by just storing the PtrInfo in the AllocaPartitioning.
+    PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
+                                                  : PtrI.getAbortingInst();
+    assert(PointerEscapingInstr && "Did not track a bad instruction");
     return;
+  }
 
   // Sort the uses. This arranges for the offsets to be in ascending order,
   // and the sizes to be in descending order.
@@ -1166,7 +1083,9 @@ AllocaPartitioning::AllocaPartitioning(const DataLayout &TD, AllocaInst &AI)
   // re-walking the recursive users of the alloca.
   Uses.resize(Partitions.size());
   UseBuilder UB(TD, AI, *this);
-  UB();
+  PtrI = UB.visitPtr(AI);
+  assert(!PtrI.isEscaped() && "Previously analyzed pointer now escapes!");
+  assert(!PtrI.isAborted() && "Early aborted the visit of the pointer.");
 }
 
 Type *AllocaPartitioning::getCommonType(iterator I) const {
@@ -2164,6 +2083,9 @@ static bool isIntegerWideningViable(const DataLayout &TD,
                                     AllocaPartitioning::const_use_iterator I,
                                     AllocaPartitioning::const_use_iterator E) {
   uint64_t SizeInBits = TD.getTypeSizeInBits(AllocaTy);
+  // Don't create integer types larger than the maximum bitwidth.
+  if (SizeInBits > IntegerType::MAX_INT_BITS)
+    return false;
 
   // Don't try to handle allocas with bit-padding.
   if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy))
@@ -2202,7 +2124,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
       if (RelBegin == 0 && RelEnd == Size)
         WholeAllocaOp = true;
       if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
-        if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy))
+        if (ITy->getBitWidth() < TD.getTypeStoreSizeInBits(ITy))
           return false;
         continue;
       }
@@ -2218,7 +2140,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
       if (RelBegin == 0 && RelEnd == Size)
         WholeAllocaOp = true;
       if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
-        if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy))
+        if (ITy->getBitWidth() < TD.getTypeStoreSizeInBits(ITy))
           return false;
         continue;
       }
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 39630fd027..762bb15c59 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -13,14 +13,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm-c/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm-c/Initialization.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/PassManager.h"
+#include "llvm-c/Transforms/Scalar.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
 
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index a46d09c320..c8656fbd8e 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -21,8 +21,15 @@
 
 #define DEBUG_TYPE "scalarrepl"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Constants.h"
 #include "llvm/DIBuilder.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -34,19 +41,12 @@
 #include "llvm/Module.h"
 #include "llvm/Operator.h"
 #include "llvm/Pass.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 9f24bb635e..9160f04fe2 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -23,19 +23,19 @@
 
 #define DEBUG_TYPE "simplifycfg"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Attributes.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
-#include "llvm/Attributes.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/Pass.h"
-#include "llvm/DataLayout.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
 #include "llvm/TargetTransformInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 STATISTIC(NumSimpl, "Number of blocks simplified");
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 0788f19014..d4643b9d80 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -17,25 +17,24 @@
 
 #define DEBUG_TYPE "simplify-libcalls"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BuildLibCalls.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Config/config.h"            // FIXME: Shouldn't depend on host!
+#include "llvm/DataLayout.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Config/config.h"            // FIXME: Shouldn't depend on host!
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
 using namespace llvm;
 
-STATISTIC(NumSimplified, "Number of library calls simplified");
 STATISTIC(NumAnnotated, "Number of attributes added to library functions");
 
 //===----------------------------------------------------------------------===//
@@ -82,289 +81,6 @@ public:
 
 
 //===----------------------------------------------------------------------===//
-// Helper Functions
-//===----------------------------------------------------------------------===//
-
-static bool CallHasFloatingPointArgument(const CallInst *CI) {
-  for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
-       it != e; ++it) {
-    if ((*it)->getType()->isFloatingPointTy())
-      return true;
-  }
-  return false;
-}
-
-namespace {
-//===----------------------------------------------------------------------===//
-// Formatting and IO Optimizations
-//===----------------------------------------------------------------------===//
-
-//===---------------------------------------===//
-// 'sprintf' Optimizations
-
-struct SPrintFOpt : public LibCallOptimization {
-  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
-                                   IRBuilder<> &B) {
-    // Check for a fixed format string.
-    StringRef FormatStr;
-    if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
-      return 0;
-
-    // If we just have a format string (nothing else crazy) transform it.
-    if (CI->getNumArgOperands() == 2) {
-      // Make sure there's no % in the constant array.  We could try to handle
-      // %% -> % in the future if we cared.
-      for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
-        if (FormatStr[i] == '%')
-          return 0; // we found a format specifier, bail out.
-
-      // These optimizations require DataLayout.
-      if (!TD) return 0;
-
-      // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
-      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
-                     ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
-                                      FormatStr.size() + 1), 1);   // nul byte.
-      return ConstantInt::get(CI->getType(), FormatStr.size());
-    }
-
-    // The remaining optimizations require the format string to be "%s" or "%c"
-    // and have an extra operand.
-    if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
-        CI->getNumArgOperands() < 3)
-      return 0;
-
-    // Decode the second character of the format string.
-    if (FormatStr[1] == 'c') {
-      // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
-      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
-      Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
-      Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
-      B.CreateStore(V, Ptr);
-      Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
-      B.CreateStore(B.getInt8(0), Ptr);
-
-      return ConstantInt::get(CI->getType(), 1);
-    }
-
-    if (FormatStr[1] == 's') {
-      // These optimizations require DataLayout.
-      if (!TD) return 0;
-
-      // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
-      if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
-
-      Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI);
-      if (!Len)
-        return 0;
-      Value *IncLen = B.CreateAdd(Len,
-                                  ConstantInt::get(Len->getType(), 1),
-                                  "leninc");
-      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
-
-      // The sprintf result is the unincremented number of bytes in the string.
-      return B.CreateIntCast(Len, CI->getType(), false);
-    }
-    return 0;
-  }
-
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Require two fixed pointer arguments and an integer result.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
-        !FT->getParamType(1)->isPointerTy() ||
-        !FT->getReturnType()->isIntegerTy())
-      return 0;
-
-    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
-      return V;
-    }
-
-    // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
-    // point arguments.
-    if (TLI->has(LibFunc::siprintf) && !CallHasFloatingPointArgument(CI)) {
-      Module *M = B.GetInsertBlock()->getParent()->getParent();
-      Constant *SIPrintFFn =
-        M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
-      CallInst *New = cast<CallInst>(CI->clone());
-      New->setCalledFunction(SIPrintFFn);
-      B.Insert(New);
-      return New;
-    }
-    return 0;
-  }
-};
-
-//===---------------------------------------===//
-// 'fwrite' Optimizations
-
-struct FWriteOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Require a pointer, an integer, an integer, a pointer, returning integer.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
-        !FT->getParamType(1)->isIntegerTy() ||
-        !FT->getParamType(2)->isIntegerTy() ||
-        !FT->getParamType(3)->isPointerTy() ||
-        !FT->getReturnType()->isIntegerTy())
-      return 0;
-
-    // Get the element size and count.
-    ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
-    ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
-    if (!SizeC || !CountC) return 0;
-    uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
-
-    // If this is writing zero records, remove the call (it's a noop).
-    if (Bytes == 0)
-      return ConstantInt::get(CI->getType(), 0);
-
-    // If this is writing one byte, turn it into fputc.
-    // This optimisation is only valid, if the return value is unused.
-    if (Bytes == 1 && CI->use_empty()) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
-      Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
-      Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI);
-      return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
-    }
-
-    return 0;
-  }
-};
-
-//===---------------------------------------===//
-// 'fputs' Optimizations
-
-struct FPutsOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // These optimizations require DataLayout.
-    if (!TD) return 0;
-
-    // Require two pointers.  Also, we can't optimize if return value is used.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
-        !FT->getParamType(1)->isPointerTy() ||
-        !CI->use_empty())
-      return 0;
-
-    // fputs(s,F) --> fwrite(s,1,strlen(s),F)
-    uint64_t Len = GetStringLength(CI->getArgOperand(0));
-    if (!Len) return 0;
-    // Known to have no uses (see above).
-    return EmitFWrite(CI->getArgOperand(0),
-                      ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
-                      CI->getArgOperand(1), B, TD, TLI);
-  }
-};
-
-//===---------------------------------------===//
-// 'fprintf' Optimizations
-
-struct FPrintFOpt : public LibCallOptimization {
-  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
-                                   IRBuilder<> &B) {
-    // All the optimizations depend on the format string.
-    StringRef FormatStr;
-    if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
-      return 0;
-
-    // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
-    if (CI->getNumArgOperands() == 2) {
-      for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
-        if (FormatStr[i] == '%')  // Could handle %% -> % if we cared.
-          return 0; // We found a format specifier.
-
-      // These optimizations require DataLayout.
-      if (!TD) return 0;
-
-      Value *NewCI = EmitFWrite(CI->getArgOperand(1),
-                                ConstantInt::get(TD->getIntPtrType(*Context),
-                                                 FormatStr.size()),
-                                CI->getArgOperand(0), B, TD, TLI);
-      return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
-    }
-
-    // The remaining optimizations require the format string to be "%s" or "%c"
-    // and have an extra operand.
-    if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
-        CI->getNumArgOperands() < 3)
-      return 0;
-
-    // Decode the second character of the format string.
-    if (FormatStr[1] == 'c') {
-      // fprintf(F, "%c", chr) --> fputc(chr, F)
-      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
-      Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
-                               TD, TLI);
-      return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
-    }
-
-    if (FormatStr[1] == 's') {
-      // fprintf(F, "%s", str) --> fputs(str, F)
-      if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
-        return 0;
-      return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
-    }
-    return 0;
-  }
-
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Require two fixed paramters as pointers and integer result.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
-        !FT->getParamType(1)->isPointerTy() ||
-        !FT->getReturnType()->isIntegerTy())
-      return 0;
-
-    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
-      return V;
-    }
-
-    // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
-    // floating point arguments.
-    if (TLI->has(LibFunc::fiprintf) && !CallHasFloatingPointArgument(CI)) {
-      Module *M = B.GetInsertBlock()->getParent()->getParent();
-      Constant *FIPrintFFn =
-        M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
-      CallInst *New = cast<CallInst>(CI->clone());
-      New->setCalledFunction(FIPrintFFn);
-      B.Insert(New);
-      return New;
-    }
-    return 0;
-  }
-};
-
-//===---------------------------------------===//
-// 'puts' Optimizations
-
-struct PutsOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Require one fixed pointer argument and an integer/void result.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
-        !(FT->getReturnType()->isIntegerTy() ||
-          FT->getReturnType()->isVoidTy()))
-      return 0;
-
-    // Check for a constant string.
-    StringRef Str;
-    if (!getConstantStringInfo(CI->getArgOperand(0), Str))
-      return 0;
-
-    if (Str.empty() && CI->use_empty()) {
-      // puts("") -> putchar('\n')
-      Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI);
-      if (CI->use_empty() || !Res) return Res;
-      return B.CreateIntCast(Res, CI->getType(), true);
-    }
-
-    return 0;
-  }
-};
-
-} // end anonymous namespace.
-
-//===----------------------------------------------------------------------===//
 // SimplifyLibCalls Pass Implementation
 //===----------------------------------------------------------------------===//
 
@@ -375,10 +91,6 @@ namespace {
     TargetLibraryInfo *TLI;
 
     StringMap<LibCallOptimization*> Optimizations;
-    // Formatting and IO Optimizations
-    SPrintFOpt SPrintF;
-    FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
-    PutsOpt Puts;
 
     bool Modified;  // This is only used by doInitialization.
   public:
@@ -433,12 +145,6 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2,
 /// Optimizations - Populate the Optimizations map with all the optimizations
 /// we know.
 void SimplifyLibCalls::InitOptimizations() {
-  // Formatting and IO Optimizations
-  Optimizations["sprintf"] = &SPrintF;
-  AddOpt(LibFunc::fwrite, &FWrite);
-  AddOpt(LibFunc::fputs, &FPuts);
-  Optimizations["fprintf"] = &FPrintF;
-  Optimizations["puts"] = &Puts;
 }
 
 
@@ -486,7 +192,6 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
 
       // Something changed!
       Changed = true;
-      ++NumSimplified;
 
       // Inspect the instruction after the call (which was potentially just
       // added) next.
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index 34f1d6c622..cde9c178ad 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -14,13 +14,13 @@
 
 #define DEBUG_TYPE "sink"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 6557d630a9..e357378524 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -52,8 +52,12 @@
 
 #define DEBUG_TYPE "tailcallelim"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -61,16 +65,12 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/Loads.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 STATISTIC(NumEliminated, "Number of tail calls removed");
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index 6815e411b4..3a19b706ea 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -12,16 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/AddrModeMatcher.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Instruction.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/DataLayout.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/PatternMatch.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/CallSite.h"
 
 using namespace llvm;
 using namespace llvm::PatternMatch;
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 9fea11391a..e8833f2092 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -13,20 +13,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Constant.h"
-#include "llvm/Type.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Constant.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ValueHandle.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Type.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 6b04e3d17b..385ceb13b2 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -17,17 +17,17 @@
 
 #define DEBUG_TYPE "break-crit-edges"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
-#include "llvm/Type.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 STATISTIC(NumBroken, "Number of blocks inserted");
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 74b2ee10e0..62b79bf2b3 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -12,7 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Function.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/Intrinsics.h"
@@ -20,10 +22,8 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Type.h"
 
 using namespace llvm;
 
@@ -43,12 +43,12 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
   AttributeWithIndex AWI[2];
   AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
                                    ArrayRef<Attributes::AttrVal>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Constant *StrLen = M->getOrInsertFunction("strlen",
-                                            AttrListPtr::get(M->getContext(),
+                                            AttributeSet::get(M->getContext(),
                                                              AWI),
                                             TD->getIntPtrType(Context),
                                             B.getInt8PtrTy(),
@@ -72,12 +72,12 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
   AttributeWithIndex AWI[2];
   AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
                                    ArrayRef<Attributes::AttrVal>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Constant *StrNLen = M->getOrInsertFunction("strnlen",
-                                             AttrListPtr::get(M->getContext(),
+                                             AttributeSet::get(M->getContext(),
                                                               AWI),
                                              TD->getIntPtrType(Context),
                                              B.getInt8PtrTy(),
@@ -101,13 +101,13 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
   AttributeWithIndex AWI =
-    AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+    AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
                             ArrayRef<Attributes::AttrVal>(AVs, 2));
 
   Type *I8Ptr = B.getInt8PtrTy();
   Type *I32Ty = B.getInt32Ty();
   Constant *StrChr = M->getOrInsertFunction("strchr",
-                                            AttrListPtr::get(M->getContext(),
+                                            AttributeSet::get(M->getContext(),
                                                              AWI),
                                             I8Ptr, I8Ptr, I32Ty, NULL);
   CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
@@ -129,12 +129,12 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
   AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
   AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+  AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
                                    ArrayRef<Attributes::AttrVal>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *StrNCmp = M->getOrInsertFunction("strncmp",
-                                          AttrListPtr::get(M->getContext(),
+                                          AttributeSet::get(M->getContext(),
                                                            AWI),
                                           B.getInt32Ty(),
                                           B.getInt8PtrTy(),
@@ -160,11 +160,11 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
   AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
                                    Attributes::NoUnwind);
   Type *I8Ptr = B.getInt8PtrTy();
   Value *StrCpy = M->getOrInsertFunction(Name,
-                                         AttrListPtr::get(M->getContext(), AWI),
+                                         AttributeSet::get(M->getContext(), AWI),
                                          I8Ptr, I8Ptr, I8Ptr, NULL);
   CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
                                Name);
@@ -184,11 +184,11 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
   AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
                                    Attributes::NoUnwind);
   Type *I8Ptr = B.getInt8PtrTy();
   Value *StrNCpy = M->getOrInsertFunction(Name,
-                                          AttrListPtr::get(M->getContext(),
+                                          AttributeSet::get(M->getContext(),
                                                            AWI),
                                           I8Ptr, I8Ptr, I8Ptr,
                                           Len->getType(), NULL);
@@ -210,11 +210,11 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI;
-  AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+  AWI = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
                                 Attributes::NoUnwind);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
-                                         AttrListPtr::get(M->getContext(), AWI),
+                                         AttributeSet::get(M->getContext(), AWI),
                                          B.getInt8PtrTy(),
                                          B.getInt8PtrTy(),
                                          B.getInt8PtrTy(),
@@ -239,11 +239,11 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI;
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+  AWI = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
                                 ArrayRef<Attributes::AttrVal>(AVs, 2));
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *MemChr = M->getOrInsertFunction("memchr",
-                                         AttrListPtr::get(M->getContext(), AWI),
+                                         AttributeSet::get(M->getContext(), AWI),
                                          B.getInt8PtrTy(),
                                          B.getInt8PtrTy(),
                                          B.getInt32Ty(),
@@ -269,12 +269,12 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
   AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
   AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+  AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
                                    ArrayRef<Attributes::AttrVal>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *MemCmp = M->getOrInsertFunction("memcmp",
-                                         AttrListPtr::get(M->getContext(), AWI),
+                                         AttributeSet::get(M->getContext(), AWI),
                                          B.getInt32Ty(),
                                          B.getInt8PtrTy(),
                                          B.getInt8PtrTy(),
@@ -293,7 +293,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
 /// returns one value with the same type.  If 'Op' is a long double, 'l' is
 /// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
 Value *llvm::EmitUnaryFloatFnCall(Value *Op, StringRef Name, IRBuilder<> &B,
-                                  const AttrListPtr &Attrs) {
+                                  const AttributeSet &Attrs) {
   SmallString<20> NameBuffer;
   if (!Op->getType()->isDoubleTy()) {
     // If we need to add a suffix, copy into NameBuffer.
@@ -348,11 +348,11 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
   AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
                                    Attributes::NoUnwind);
 
   Value *PutS = M->getOrInsertFunction("puts",
-                                       AttrListPtr::get(M->getContext(), AWI),
+                                       AttributeSet::get(M->getContext(), AWI),
                                        B.getInt32Ty(),
                                        B.getInt8PtrTy(),
                                        NULL);
@@ -372,12 +372,12 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
   AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
                                    Attributes::NoUnwind);
   Constant *F;
   if (File->getType()->isPointerTy())
     F = M->getOrInsertFunction("fputc",
-                               AttrListPtr::get(M->getContext(), AWI),
+                               AttributeSet::get(M->getContext(), AWI),
                                B.getInt32Ty(),
                                B.getInt32Ty(), File->getType(),
                                NULL);
@@ -406,13 +406,13 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
   AttributeWithIndex AWI[3];
   AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
   AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
-  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+  AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
                                    Attributes::NoUnwind);
   StringRef FPutsName = TLI->getName(LibFunc::fputs);
   Constant *F;
   if (File->getType()->isPointerTy())
     F = M->getOrInsertFunction(FPutsName,
-                               AttrListPtr::get(M->getContext(), AWI),
+                               AttributeSet::get(M->getContext(), AWI),
                                B.getInt32Ty(),
                                B.getInt8PtrTy(),
                                File->getType(), NULL);
@@ -439,14 +439,14 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
   AttributeWithIndex AWI[3];
   AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
   AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attributes::NoCapture);
-  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+  AWI[2] = AttributeWithIndex::get(M->getContext(), AttributeSet::FunctionIndex,
                                    Attributes::NoUnwind);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   StringRef FWriteName = TLI->getName(LibFunc::fwrite);
   Constant *F;
   if (File->getType()->isPointerTy())
     F = M->getOrInsertFunction(FWriteName,
-                               AttrListPtr::get(M->getContext(), AWI),
+                               AttributeSet::get(M->getContext(), AWI),
                                TD->getIntPtrType(Context),
                                B.getInt8PtrTy(),
                                TD->getIntPtrType(Context),
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index bee2f7bcb6..1699a3b648 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -16,11 +16,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "bypass-slow-division"
-#include "llvm/Instructions.h"
+#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/Function.h"
 #include "llvm/IRBuilder.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/Instructions.h"
 
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 7ba9f6d9d2..12f2e4b83e 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -14,22 +14,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Constants.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Function.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/ADT/SmallVector.h"
 #include <map>
 using namespace llvm;
 
@@ -99,12 +99,12 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
                        .getParamAttributes(I->getArgNo() + 1));
     NewFunc->setAttributes(NewFunc->getAttributes()
                            .addAttr(NewFunc->getContext(),
-                                    AttrListPtr::ReturnIndex,
+                                    AttributeSet::ReturnIndex,
                                     OldFunc->getAttributes()
                                      .getRetAttributes()));
     NewFunc->setAttributes(NewFunc->getAttributes()
                            .addAttr(NewFunc->getContext(),
-                                    AttrListPtr::FunctionIndex,
+                                    AttributeSet::FunctionIndex,
                                     OldFunc->getAttributes()
                                      .getFnAttributes()));
 
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 1dac6b5b8b..114babd101 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -13,9 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Module.h"
-#include "llvm/DerivedTypes.h"
 #include "llvm/Constant.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 using namespace llvm;
 
@@ -38,10 +38,6 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
   New->setTargetTriple(M->getTargetTriple());
   New->setModuleInlineAsm(M->getModuleInlineAsm());
    
-  // Copy all of the dependent libraries over.
-  for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
-    New->addLibrary(*I);
-
   // Loop over all of the global variables, making corresponding globals in the
   // new module.  Here we add them to the VMap and to the new Module.  We
   // don't worry about attributes or initializers, they will come later.
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 281714f4c1..a596df64fd 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -14,6 +14,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/CodeExtractor.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/Verifier.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
@@ -21,18 +28,11 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/RegionInfo.h"
-#include "llvm/Analysis/RegionIterator.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include <algorithm>
 #include <set>
 using namespace llvm;
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index 99b5830163..f8a0cafadc 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -8,10 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/Type.h"
-#include "llvm/ADT/DenseMap.h"
 using namespace llvm;
 
 /// DemoteRegToStack - This function takes a virtual register computed by an
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 303de56d95..c176cf1075 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -13,8 +13,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Attributes.h"
 #include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/IRBuilder.h"
@@ -22,12 +27,7 @@
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/Module.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/DataLayout.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index 55227e2714..67dcbe446b 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -15,11 +15,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "integer-division"
+#include "llvm/Transforms/Utils/IntegerDivision.h"
 #include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
 #include "llvm/Instructions.h"
 #include "llvm/Intrinsics.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Transforms/Utils/IntegerDivision.h"
 
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 5e05c83c35..5dddb6e28a 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -29,17 +29,17 @@
 
 #define DEBUG_TYPE "lcssa"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Pass.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/PredIteratorCache.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 using namespace llvm;
 
 STATISTIC(NumLCSSA, "Number of live out of a loop variables");
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index a954d82c05..0e56817a1b 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -13,8 +13,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Constants.h"
 #include "llvm/DIBuilder.h"
+#include "llvm/DataLayout.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalAlias.h"
@@ -26,20 +34,12 @@
 #include "llvm/MDBuilder.h"
 #include "llvm/Metadata.h"
 #include "llvm/Operator.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 9d9e201665..6a68416a3d 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -39,26 +39,26 @@
 
 #define DEBUG_TYPE "loop-simplify"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Type.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 20237500c3..d24b334681 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -18,12 +18,12 @@
 
 #define DEBUG_TYPE "loop-unroll"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
-#include "llvm/BasicBlock.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/BasicBlock.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 67e17f4ca8..242e7fa021 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -23,12 +23,12 @@
 
 #define DEBUG_TYPE "loop-unroll"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
-#include "llvm/BasicBlock.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/BasicBlock.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index 02bdcda391..8756d26ca4 100644
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -12,6 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "lower-expect-intrinsic"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
@@ -21,8 +23,6 @@
 #include "llvm/MDBuilder.h"
 #include "llvm/Metadata.h"
 #include "llvm/Pass.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include <vector>
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 930555424d..7b89ffd401 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -36,6 +36,8 @@
 
 #define DEBUG_TYPE "lowerinvoke"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
@@ -43,12 +45,10 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <csetjmp>
 #include <set>
 using namespace llvm;
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 1547439b5c..74a457ce81 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -14,16 +14,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index f4ca81af6d..70fbf13b97 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -14,12 +14,12 @@
 
 #define DEBUG_TYPE "mem2reg"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Instructions.h"
 #include "llvm/Function.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/Instructions.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
 using namespace llvm;
 
 STATISTIC(NumPromoted, "Number of alloca's promoted");
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index ada8e3b11e..363e9367f3 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -13,9 +13,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/Transforms/IPO.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Module.h"
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 558de9d12e..b41f433659 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -27,26 +27,26 @@
 
 #define DEBUG_TYPE "mem2reg"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Constants.h"
+#include "llvm/DIBuilder.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/DIBuilder.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Metadata.h"
-#include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
 #include <queue>
 using namespace llvm;
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 72d4199a2a..e1e7f4d668 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -12,12 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "ssaupdater"
-#include "llvm/Constants.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CFG.h"
@@ -25,7 +26,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
 #include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
 
 using namespace llvm;
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 6c34eed13d..3cae77227c 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -13,6 +13,14 @@
 
 #define DEBUG_TYPE "simplifycfg"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Constants.h"
 #include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
@@ -25,15 +33,6 @@
 #include "llvm/Metadata.h"
 #include "llvm/Module.h"
 #include "llvm/Operator.h"
-#include "llvm/Type.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ConstantRange.h"
@@ -42,9 +41,10 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TargetTransformInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Type.h"
 #include <algorithm>
-#include <set>
 #include <map>
+#include <set>
 using namespace llvm;
 
 static cl::opt<unsigned>
@@ -3511,22 +3511,44 @@ bool SwitchLookupTable::WouldFitInRegister(const DataLayout *TD,
 static bool ShouldBuildLookupTable(SwitchInst *SI,
                                    uint64_t TableSize,
                                    const DataLayout *TD,
+                                   const TargetTransformInfo *TTI,
                             const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
-  // The table density should be at least 40%. This is the same criterion as for
-  // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
-  // FIXME: Find the best cut-off.
   if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
     return false; // TableSize overflowed, or mul below might overflow.
-  if (SI->getNumCases() * 10 >= TableSize * 4)
-    return true;
 
-  // If each table would fit in a register, we should build it anyway.
+  bool AllTablesFitInRegister = true;
+  bool HasIllegalType = false;
   for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(),
        E = ResultTypes.end(); I != E; ++I) {
-    if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second))
-      return false;
+    Type *Ty = I->second;
+
+    // Saturate this flag to true.
+    HasIllegalType = HasIllegalType ||
+      !TTI->getScalarTargetTransformInfo()->isTypeLegal(Ty);
+
+    // Saturate this flag to false.
+    AllTablesFitInRegister = AllTablesFitInRegister &&
+      SwitchLookupTable::WouldFitInRegister(TD, TableSize, Ty);
+
+    // If both flags saturate, we're done. NOTE: This *only* works with
+    // saturating flags, and all flags have to saturate first due to the
+    // non-deterministic behavior of iterating over a dense map.
+    if (HasIllegalType && !AllTablesFitInRegister)
+      break;
   }
-  return true;
+
+  // If each table would fit in a register, we should build it anyway.
+  if (AllTablesFitInRegister)
+    return true;
+
+  // Don't build a table that doesn't fit in-register if it has illegal types.
+  if (HasIllegalType)
+    return false;
+
+  // The table density should be at least 40%. This is the same criterion as for
+  // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
+  // FIXME: Find the best cut-off.
+  return SI->getNumCases() * 10 >= TableSize * 4;
 }
 
 /// SwitchToLookupTable - If the switch is only used to initialize one or more
@@ -3607,7 +3629,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
 
   APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
   uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
-  if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes))
+  if (!ShouldBuildLookupTable(SI, TableSize, TD, TTI, ResultTypes))
     return false;
 
   // Create the BB that does the lookups.
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 110f380857..5883293a81 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -15,18 +15,18 @@
 
 #define DEBUG_TYPE "indvars"
 
-#include "llvm/Instructions.h"
+#include "llvm/Transforms/Utils/SimplifyIndVar.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Instructions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#include "llvm/DataLayout.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
 
 using namespace llvm;
 
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index 65353dc460..8b2eeb9928 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -15,18 +15,18 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "instsimplify"
-#include "llvm/Function.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/DataLayout.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
 #include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 STATISTIC(NumSimplified, "Number of redundant instructions removed");
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 6ca1175c41..11a1ee5207 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -15,14 +15,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
-#include "llvm/DataLayout.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/DataLayout.h"
 #include "llvm/Function.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 
@@ -1409,6 +1409,254 @@ struct PrintFOpt : public LibCallOptimization {
   }
 };
 
+struct SPrintFOpt : public LibCallOptimization {
+  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
+    // Check for a fixed format string.
+    StringRef FormatStr;
+    if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
+      return 0;
+
+    // If we just have a format string (nothing else crazy) transform it.
+    if (CI->getNumArgOperands() == 2) {
+      // Make sure there's no % in the constant array.  We could try to handle
+      // %% -> % in the future if we cared.
+      for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+        if (FormatStr[i] == '%')
+          return 0; // we found a format specifier, bail out.
+
+      // These optimizations require DataLayout.
+      if (!TD) return 0;
+
+      // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                     ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
+                                      FormatStr.size() + 1), 1);   // nul byte.
+      return ConstantInt::get(CI->getType(), FormatStr.size());
+    }
+
+    // The remaining optimizations require the format string to be "%s" or "%c"
+    // and have an extra operand.
+    if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+        CI->getNumArgOperands() < 3)
+      return 0;
+
+    // Decode the second character of the format string.
+    if (FormatStr[1] == 'c') {
+      // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
+      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+      Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
+      Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
+      B.CreateStore(V, Ptr);
+      Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
+      B.CreateStore(B.getInt8(0), Ptr);
+
+      return ConstantInt::get(CI->getType(), 1);
+    }
+
+    if (FormatStr[1] == 's') {
+      // These optimizations require DataLayout.
+      if (!TD) return 0;
+
+      // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
+      if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
+
+      Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD, TLI);
+      if (!Len)
+        return 0;
+      Value *IncLen = B.CreateAdd(Len,
+                                  ConstantInt::get(Len->getType(), 1),
+                                  "leninc");
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
+
+      // The sprintf result is the unincremented number of bytes in the string.
+      return B.CreateIntCast(Len, CI->getType(), false);
+    }
+    return 0;
+  }
+
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require two fixed pointer arguments and an integer result.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
+    // point arguments.
+    if (TLI->has(LibFunc::siprintf) && !callHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *SIPrintFFn =
+        M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(SIPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
+};
+
+struct FPrintFOpt : public LibCallOptimization {
+  Value *optimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
+    // All the optimizations depend on the format string.
+    StringRef FormatStr;
+    if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
+      return 0;
+
+    // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
+    if (CI->getNumArgOperands() == 2) {
+      for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+        if (FormatStr[i] == '%')  // Could handle %% -> % if we cared.
+          return 0; // We found a format specifier.
+
+      // These optimizations require DataLayout.
+      if (!TD) return 0;
+
+      Value *NewCI = EmitFWrite(CI->getArgOperand(1),
+                                ConstantInt::get(TD->getIntPtrType(*Context),
+                                                 FormatStr.size()),
+                                CI->getArgOperand(0), B, TD, TLI);
+      return NewCI ? ConstantInt::get(CI->getType(), FormatStr.size()) : 0;
+    }
+
+    // The remaining optimizations require the format string to be "%s" or "%c"
+    // and have an extra operand.
+    if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+        CI->getNumArgOperands() < 3)
+      return 0;
+
+    // Decode the second character of the format string.
+    if (FormatStr[1] == 'c') {
+      // fprintf(F, "%c", chr) --> fputc(chr, F)
+      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+      Value *NewCI = EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B,
+                               TD, TLI);
+      return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+    }
+
+    if (FormatStr[1] == 's') {
+      // fprintf(F, "%s", str) --> fputs(str, F)
+      if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
+        return 0;
+      return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD, TLI);
+    }
+    return 0;
+  }
+
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require two fixed paramters as pointers and integer result.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
+    // floating point arguments.
+    if (TLI->has(LibFunc::fiprintf) && !callHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *FIPrintFFn =
+        M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(FIPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
+};
+
+struct FWriteOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require a pointer, an integer, an integer, a pointer, returning integer.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isIntegerTy() ||
+        !FT->getParamType(2)->isIntegerTy() ||
+        !FT->getParamType(3)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    // Get the element size and count.
+    ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+    ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+    if (!SizeC || !CountC) return 0;
+    uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
+
+    // If this is writing zero records, remove the call (it's a noop).
+    if (Bytes == 0)
+      return ConstantInt::get(CI->getType(), 0);
+
+    // If this is writing one byte, turn it into fputc.
+    // This optimisation is only valid, if the return value is unused.
+    if (Bytes == 1 && CI->use_empty()) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
+      Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
+      Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, TD, TLI);
+      return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+    }
+
+    return 0;
+  }
+};
+
+struct FPutsOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require DataLayout.
+    if (!TD) return 0;
+
+    // Require two pointers.  Also, we can't optimize if return value is used.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !CI->use_empty())
+      return 0;
+
+    // fputs(s,F) --> fwrite(s,1,strlen(s),F)
+    uint64_t Len = GetStringLength(CI->getArgOperand(0));
+    if (!Len) return 0;
+    // Known to have no uses (see above).
+    return EmitFWrite(CI->getArgOperand(0),
+                      ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
+                      CI->getArgOperand(1), B, TD, TLI);
+  }
+};
+
+struct PutsOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require one fixed pointer argument and an integer/void result.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+        !(FT->getReturnType()->isIntegerTy() ||
+          FT->getReturnType()->isVoidTy()))
+      return 0;
+
+    // Check for a constant string.
+    StringRef Str;
+    if (!getConstantStringInfo(CI->getArgOperand(0), Str))
+      return 0;
+
+    if (Str.empty() && CI->use_empty()) {
+      // puts("") -> putchar('\n')
+      Value *Res = EmitPutChar(B.getInt32('\n'), B, TD, TLI);
+      if (CI->use_empty() || !Res) return Res;
+      return B.CreateIntCast(Res, CI->getType(), true);
+    }
+
+    return 0;
+  }
+};
+
 } // End anonymous namespace.
 
 namespace llvm {
@@ -1464,6 +1712,11 @@ class LibCallSimplifierImpl {
 
   // Formatting and IO library call optimizations.
   PrintFOpt PrintF;
+  SPrintFOpt SPrintF;
+  FPrintFOpt FPrintF;
+  FWriteOpt FWrite;
+  FPutsOpt FPuts;
+  PutsOpt Puts;
 
   void initOptimizations();
   void addOpt(LibFunc::Func F, LibCallOptimization* Opt);
@@ -1588,6 +1841,11 @@ void LibCallSimplifierImpl::initOptimizations() {
 
   // Formatting and IO library call optimizations.
   addOpt(LibFunc::printf, &PrintF);
+  addOpt(LibFunc::sprintf, &SPrintF);
+  addOpt(LibFunc::fprintf, &FPrintF);
+  addOpt(LibFunc::fwrite, &FWrite);
+  addOpt(LibFunc::fputs, &FPuts);
+  addOpt(LibFunc::puts, &Puts);
 }
 
 Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index b1cad06dff..8cf62196cc 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -15,12 +15,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Type.h"
-#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
 char UnifyFunctionExitNodes::ID = 0;
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index dacbc7f242..a48229132b 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -16,22 +16,13 @@
 
 #define BBV_NAME "bb-vectorize"
 #define DEBUG_TYPE BBV_NAME
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
+#include "llvm/Transforms/Vectorize.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
@@ -39,14 +30,23 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ValueHandle.h"
-#include "llvm/DataLayout.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/TargetTransformInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Vectorize.h"
+#include "llvm/Type.h"
 #include <algorithm>
 #include <map>
 using namespace llvm;
@@ -2903,6 +2903,7 @@ namespace {
         K->mutateType(getVecTypeForPair(L->getType(), H->getType()));
 
       combineMetadata(K, H);
+      K->intersectOptionalDataWith(H);
 
       for (unsigned o = 0; o < NumOperands; ++o)
         K->setOperand(o, ReplacedOperands[o]);
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 55733f7f8a..feeececedb 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6,432 +6,50 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
-// and generates target-independent LLVM-IR. Legalization of the IR is done
-// in the codegen. However, the vectorizes uses (will use) the codegen
-// interfaces to generate IR that is likely to result in an optimal binary.
-//
-// The loop vectorizer combines consecutive loop iteration into a single
-// 'wide' iteration. After this transformation the index is incremented
-// by the SIMD vector width, and not by one.
-//
-// This pass has three parts:
-// 1. The main loop pass that drives the different parts.
-// 2. LoopVectorizationLegality - A unit that checks for the legality
-//    of the vectorization.
-// 3. SingleBlockLoopVectorizer - A unit that performs the actual
-//    widening of instructions.
-// 4. LoopVectorizationCostModel - A unit that checks for the profitability
-//    of vectorization. It decides on the optimal vector width, which
-//    can be one, if vectorization is not profitable.
-//
-//===----------------------------------------------------------------------===//
-//
-// The reduction-variable vectorization is based on the paper:
-//  D. Nuzman and R. Henderson. Multi-platform Auto-vectorization.
-//
-// Variable uniformity checks are inspired by:
-// Karrenberg, R. and Hack, S. Whole Function Vectorization.
-//
-// Other ideas/concepts are from:
-//  A. Zaks and D. Nuzman. Autovectorization in GCC-two years later.
-//
-//  S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua.  An Evaluation of
-//  Vectorizing Compilers.
-//
-//===----------------------------------------------------------------------===//
-#define LV_NAME "loop-vectorize"
-#define DEBUG_TYPE LV_NAME
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Pass.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Value.h"
-#include "llvm/Function.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/ADT/SmallVector.h"
+#include "LoopVectorize.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
-#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/TargetTransformInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/DataLayout.h"
+#include "llvm/TargetTransformInfo.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include <algorithm>
-using namespace llvm;
+#include "llvm/Transforms/Vectorize.h"
+#include "llvm/Type.h"
+#include "llvm/Value.h"
 
 static cl::opt<unsigned>
 VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
-          cl::desc("Set the default vectorization width. Zero is autoselect."));
-
-/// We don't vectorize loops with a known constant trip count below this number.
-const unsigned TinyTripCountThreshold = 16;
+                    cl::desc("Sets the SIMD width. Zero is autoselect."));
 
-/// When performing a runtime memory check, do not check more than this
-/// number of pointers. Notice that the check is quadratic!
-const unsigned RuntimeMemoryCheckThreshold = 2;
-
-/// This is the highest vector width that we try to generate.
-const unsigned MaxVectorSize = 8;
+static cl::opt<bool>
+EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
+                   cl::desc("Enable if-conversion during vectorization."));
 
 namespace {
 
-// Forward declarations.
-class LoopVectorizationLegality;
-class LoopVectorizationCostModel;
-
-/// SingleBlockLoopVectorizer vectorizes loops which contain only one basic
-/// block to a specified vectorization factor (VF).
-/// This class performs the widening of scalars into vectors, or multiple
-/// scalars. This class also implements the following features:
-/// * It inserts an epilogue loop for handling loops that don't have iteration
-///   counts that are known to be a multiple of the vectorization factor.
-/// * It handles the code generation for reduction variables.
-/// * Scalarization (implementation using scalars) of un-vectorizable
-///   instructions.
-/// SingleBlockLoopVectorizer does not perform any vectorization-legality
-/// checks, and relies on the caller to check for the different legality
-/// aspects. The SingleBlockLoopVectorizer relies on the
-/// LoopVectorizationLegality class to provide information about the induction
-/// and reduction variables that were found to a given vectorization factor.
-class SingleBlockLoopVectorizer {
-public:
-  /// Ctor.
-  SingleBlockLoopVectorizer(Loop *Orig, ScalarEvolution *Se, LoopInfo *Li,
-                            DominatorTree *Dt, DataLayout *Dl,
-                            LPPassManager *Lpm,
-                            unsigned VecWidth):
-  OrigLoop(Orig), SE(Se), LI(Li), DT(Dt), DL(Dl), LPM(Lpm), VF(VecWidth),
-  Builder(Se->getContext()), Induction(0), OldInduction(0) { }
-
-  // Perform the actual loop widening (vectorization).
-  void vectorize(LoopVectorizationLegality *Legal) {
-    // Create a new empty loop. Unlink the old loop and connect the new one.
-    createEmptyLoop(Legal);
-    // Widen each instruction in the old loop to a new one in the new loop.
-    // Use the Legality module to find the induction and reduction variables.
-    vectorizeLoop(Legal);
-    // Register the new loop and update the analysis passes.
-    updateAnalysis();
- }
-
-private:
-  /// Add code that checks at runtime if the accessed arrays overlap.
-  /// Returns the comperator value or NULL if no check is needed.
-  Value *addRuntimeCheck(LoopVectorizationLegality *Legal,
-                         Instruction *Loc);
-  /// Create an empty loop, based on the loop ranges of the old loop.
-  void createEmptyLoop(LoopVectorizationLegality *Legal);
-  /// Copy and widen the instructions from the old loop.
-  void vectorizeLoop(LoopVectorizationLegality *Legal);
-  /// Insert the new loop to the loop hierarchy and pass manager
-  /// and update the analysis passes.
-  void updateAnalysis();
-
-  /// This instruction is un-vectorizable. Implement it as a sequence
-  /// of scalars.
-  void scalarizeInstruction(Instruction *Instr);
-
-  /// Create a broadcast instruction. This method generates a broadcast
-  /// instruction (shuffle) for loop invariant values and for the induction
-  /// value. If this is the induction variable then we extend it to N, N+1, ...
-  /// this is needed because each iteration in the loop corresponds to a SIMD
-  /// element.
-  Value *getBroadcastInstrs(Value *V);
-
-  /// This is a helper function used by getBroadcastInstrs. It adds 0, 1, 2 ..
-  /// for each element in the vector. Starting from zero.
-  Value *getConsecutiveVector(Value* Val);
-
-  /// When we go over instructions in the basic block we rely on previous
-  /// values within the current basic block or on loop invariant values.
-  /// When we widen (vectorize) values we place them in the map. If the values
-  /// are not within the map, they have to be loop invariant, so we simply
-  /// broadcast them into a vector.
-  Value *getVectorValue(Value *V);
-
-  /// Get a uniform vector of constant integers. We use this to get
-  /// vectors of ones and zeros for the reduction code.
-  Constant* getUniformVector(unsigned Val, Type* ScalarTy);
-
-  typedef DenseMap<Value*, Value*> ValueMap;
-
-  /// The original loop.
-  Loop *OrigLoop;
-  // Scev analysis to use.
-  ScalarEvolution *SE;
-  // Loop Info.
-  LoopInfo *LI;
-  // Dominator Tree.
-  DominatorTree *DT;
-  // Data Layout.
-  DataLayout *DL;
-  // Loop Pass Manager;
-  LPPassManager *LPM;
-  // The vectorization factor to use.
-  unsigned VF;
-
-  // The builder that we use
-  IRBuilder<> Builder;
-
-  // --- Vectorization state ---
-
-  /// The vector-loop preheader.
-  BasicBlock *LoopVectorPreHeader;
-  /// The scalar-loop preheader.
-  BasicBlock *LoopScalarPreHeader;
-  /// Middle Block between the vector and the scalar.
-  BasicBlock *LoopMiddleBlock;
-  ///The ExitBlock of the scalar loop.
-  BasicBlock *LoopExitBlock;
-  ///The vector loop body.
-  BasicBlock *LoopVectorBody;
-  ///The scalar loop body.
-  BasicBlock *LoopScalarBody;
-  ///The first bypass block.
-  BasicBlock *LoopBypassBlock;
-
-  /// The new Induction variable which was added to the new block.
-  PHINode *Induction;
-  /// The induction variable of the old basic block.
-  PHINode *OldInduction;
-  // Maps scalars to widened vectors.
-  ValueMap WidenMap;
-};
-
-/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
-/// to what vectorization factor.
-/// This class does not look at the profitability of vectorization, only the
-/// legality. This class has two main kinds of checks:
-/// * Memory checks - The code in canVectorizeMemory checks if vectorization
-///   will change the order of memory accesses in a way that will change the
-///   correctness of the program.
-/// * Scalars checks - The code in canVectorizeBlock checks for a number
-///   of different conditions, such as the availability of a single induction
-///   variable, that all types are supported and vectorize-able, etc.
-/// This code reflects the capabilities of SingleBlockLoopVectorizer.
-/// This class is also used by SingleBlockLoopVectorizer for identifying
-/// induction variable and the different reduction variables.
-class LoopVectorizationLegality {
-public:
-  LoopVectorizationLegality(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl):
-  TheLoop(Lp), SE(Se), DL(Dl), Induction(0) { }
-
-  /// This represents the kinds of reductions that we support.
-  enum ReductionKind {
-    NoReduction, /// Not a reduction.
-    IntegerAdd,  /// Sum of numbers.
-    IntegerMult, /// Product of numbers.
-    IntegerOr,   /// Bitwise or logical OR of numbers.
-    IntegerAnd,  /// Bitwise or logical AND of numbers.
-    IntegerXor   /// Bitwise or logical XOR of numbers.
-  };
-
-  /// This POD struct holds information about reduction variables.
-  struct ReductionDescriptor {
-    // Default C'tor
-    ReductionDescriptor():
-    StartValue(0), LoopExitInstr(0), Kind(NoReduction) {}
-
-    // C'tor.
-    ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K):
-    StartValue(Start), LoopExitInstr(Exit), Kind(K) {}
-
-    // The starting value of the reduction.
-    // It does not have to be zero!
-    Value *StartValue;
-    // The instruction who's value is used outside the loop.
-    Instruction *LoopExitInstr;
-    // The kind of the reduction.
-    ReductionKind Kind;
-  };
-
-  // This POD struct holds information about the memory runtime legality
-  // check that a group of pointers do not overlap.
-  struct RuntimePointerCheck {
-    RuntimePointerCheck(): Need(false) {}
-
-    /// Reset the state of the pointer runtime information.
-    void reset() {
-      Need = false;
-      Pointers.clear();
-      Starts.clear();
-      Ends.clear();
-    }
-
-    /// Insert a pointer and calculate the start and end SCEVs.
-    void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr) {
-      const SCEV *Sc = SE->getSCEV(Ptr);
-      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
-      assert(AR && "Invalid addrec expression");
-      const SCEV *Ex = SE->getExitCount(Lp, Lp->getHeader());
-      const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
-      Pointers.push_back(Ptr);
-      Starts.push_back(AR->getStart());
-      Ends.push_back(ScEnd);
-    }
-
-    /// This flag indicates if we need to add the runtime check.
-    bool Need;
-    /// Holds the pointers that we need to check.
-    SmallVector<Value*, 2> Pointers;
-    /// Holds the pointer value at the beginning of the loop.
-    SmallVector<const SCEV*, 2> Starts;
-    /// Holds the pointer value at the end of the loop.
-    SmallVector<const SCEV*, 2> Ends;
-  };
-
-  /// ReductionList contains the reduction descriptors for all
-  /// of the reductions that were found in the loop.
-  typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
-
-  /// InductionList saves induction variables and maps them to the initial
-  /// value entring the loop.
-  typedef DenseMap<PHINode*, Value*> InductionList;
-
-  /// Returns true if it is legal to vectorize this loop.
-  /// This does not mean that it is profitable to vectorize this
-  /// loop, only that it is legal to do so.
-  bool canVectorize();
-
-  /// Returns the Induction variable.
-  PHINode *getInduction() {return Induction;}
-
-  /// Returns the reduction variables found in the loop.
-  ReductionList *getReductionVars() { return &Reductions; }
-
-  /// Returns the induction variables found in the loop.
-  InductionList *getInductionVars() { return &Inductions; }
-
-  /// Check if this  pointer is consecutive when vectorizing. This happens
-  /// when the last index of the GEP is the induction variable, or that the
-  /// pointer itself is an induction variable.
-  /// This check allows us to vectorize A[idx] into a wide load/store.
-  bool isConsecutivePtr(Value *Ptr);
-
-  /// Returns true if the value V is uniform within the loop.
-  bool isUniform(Value *V);
-
-  /// Returns true if this instruction will remain scalar after vectorization.
-  bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);}
-
-  /// Returns the information that we collected about runtime memory check.
-  RuntimePointerCheck *getRuntimePointerCheck() {return &PtrRtCheck; }
-private:
-  /// Check if a single basic block loop is vectorizable.
-  /// At this point we know that this is a loop with a constant trip count
-  /// and we only need to check individual instructions.
-  bool canVectorizeBlock(BasicBlock &BB);
-
-  /// When we vectorize loops we may change the order in which
-  /// we read and write from memory. This method checks if it is
-  /// legal to vectorize the code, considering only memory constrains.
-  /// Returns true if BB is vectorizable
-  bool canVectorizeMemory(BasicBlock &BB);
-
-  /// Returns True, if 'Phi' is the kind of reduction variable for type
-  /// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
-  bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
-  /// Returns true if the instruction I can be a reduction variable of type
-  /// 'Kind'.
-  bool isReductionInstr(Instruction *I, ReductionKind Kind);
-  /// Returns True, if 'Phi' is an induction variable.
-  bool isInductionVariable(PHINode *Phi);
-  /// Return true if can compute the address bounds of Ptr within the loop.
-  bool hasComputableBounds(Value *Ptr);
-
-  /// The loop that we evaluate.
-  Loop *TheLoop;
-  /// Scev analysis.
-  ScalarEvolution *SE;
-  /// DataLayout analysis.
-  DataLayout *DL;
-
-  //  ---  vectorization state --- //
-
-  /// Holds the integer induction variable. This is the counter of the
-  /// loop.
-  PHINode *Induction;
-  /// Holds the reduction variables.
-  ReductionList Reductions;
-  /// Holds all of the induction variables that we found in the loop.
-  /// Notice that inductions don't need to start at zero and that induction
-  /// variables can be pointers.
-  InductionList Inductions;
-
-  /// Allowed outside users. This holds the reduction
-  /// vars which can be accessed from outside the loop.
-  SmallPtrSet<Value*, 4> AllowedExit;
-  /// This set holds the variables which are known to be uniform after
-  /// vectorization.
-  SmallPtrSet<Instruction*, 4> Uniforms;
-  /// We need to check that all of the pointers in this list are disjoint
-  /// at runtime.
-  RuntimePointerCheck PtrRtCheck;
-};
-
-/// LoopVectorizationCostModel - estimates the expected speedups due to
-/// vectorization.
-/// In many cases vectorization is not profitable. This can happen because
-/// of a number of reasons. In this class we mainly attempt to predict
-/// the expected speedup/slowdowns due to the supported instruction set.
-/// We use the VectorTargetTransformInfo to query the different backends
-/// for the cost of different operations.
-class LoopVectorizationCostModel {
-public:
-  /// C'tor.
-  LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se,
-                             LoopVectorizationLegality *Leg,
-                             const VectorTargetTransformInfo *Vtti):
-  TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { }
-
-  /// Returns the most profitable vectorization factor for the loop that is
-  /// smaller or equal to the VF argument. This method checks every power
-  /// of two up to VF.
-  unsigned findBestVectorizationFactor(unsigned VF = MaxVectorSize);
-
-private:
-  /// Returns the expected execution cost. The unit of the cost does
-  /// not matter because we use the 'cost' units to compare different
-  /// vector widths. The cost that is returned is *not* normalized by
-  /// the factor width.
-  unsigned expectedCost(unsigned VF);
-
-  /// Returns the execution time cost of an instruction for a given vector
-  /// width. Vector width of one means scalar.
-  unsigned getInstructionCost(Instruction *I, unsigned VF);
-
-  /// A helper function for converting Scalar types to vector types.
-  /// If the incoming type is void, we return void. If the VF is 1, we return
-  /// the scalar type.
-  static Type* ToVectorTy(Type *Scalar, unsigned VF);
-
-  /// The loop that we evaluate.
-  Loop *TheLoop;
-  /// Scev analysis.
-  ScalarEvolution *SE;
-
-  /// Vectorization legality.
-  LoopVectorizationLegality *Legal;
-  /// Vector target information.
-  const VectorTargetTransformInfo *VTTI;
-};
-
+/// The LoopVectorize Pass.
 struct LoopVectorize : public LoopPass {
   static char ID; // Pass identification, replacement for typeid
 
@@ -460,7 +78,7 @@ struct LoopVectorize : public LoopPass {
           L->getHeader()->getParent()->getName() << "\"\n");
 
     // Check if it is legal to vectorize the loop.
-    LoopVectorizationLegality LVL(L, SE, DL);
+    LoopVectorizationLegality LVL(L, SE, DL, DT);
     if (!LVL.canVectorize()) {
       DEBUG(dbgs() << "LV: Not vectorizing.\n");
       return false;
@@ -491,7 +109,7 @@ struct LoopVectorize : public LoopPass {
           "\n");
 
     // If we decided that it is *legal* to vectorizer the loop then do it.
-    SingleBlockLoopVectorizer LB(L, SE, LI, DT, DL, &LPM, VF);
+    InnerLoopVectorizer LB(L, SE, LI, DT, DL, VF);
     LB.vectorize(&LVL);
 
     DEBUG(verifyFunction(*L->getHeader()->getParent()));
@@ -511,11 +129,44 @@ struct LoopVectorize : public LoopPass {
 
 };
 
-Value *SingleBlockLoopVectorizer::getBroadcastInstrs(Value *V) {
+}// namespace
+
+//===----------------------------------------------------------------------===//
+// Implementation of LoopVectorizationLegality, InnerLoopVectorizer and
+// LoopVectorizationCostModel.
+//===----------------------------------------------------------------------===//
+
+void
+LoopVectorizationLegality::RuntimePointerCheck::insert(ScalarEvolution *SE,
+                                                       Loop *Lp, Value *Ptr) {
+  const SCEV *Sc = SE->getSCEV(Ptr);
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
+  assert(AR && "Invalid addrec expression");
+  const SCEV *Ex = SE->getExitCount(Lp, Lp->getLoopLatch());
+  const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
+  Pointers.push_back(Ptr);
+  Starts.push_back(AR->getStart());
+  Ends.push_back(ScEnd);
+}
+
+Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
   // Create the types.
   LLVMContext &C = V->getContext();
   Type *VTy = VectorType::get(V->getType(), VF);
   Type *I32 = IntegerType::getInt32Ty(C);
+
+  // Save the current insertion location.
+  Instruction *Loc = Builder.GetInsertPoint();
+
+  // We need to place the broadcast of invariant variables outside the loop.
+  Instruction *Instr = dyn_cast<Instruction>(V);
+  bool NewInstr = (Instr && Instr->getParent() == LoopVectorBody);
+  bool Invariant = OrigLoop->isLoopInvariant(V) && !NewInstr;
+
+  // Place the code for broadcasting invariant variables in the new preheader.
+  if (Invariant)
+    Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
+
   Constant *Zero = ConstantInt::get(I32, 0);
   Value *Zeros = ConstantAggregateZero::get(VectorType::get(I32, VF));
   Value *UndefVal = UndefValue::get(VTy);
@@ -523,27 +174,28 @@ Value *SingleBlockLoopVectorizer::getBroadcastInstrs(Value *V) {
   Value *SingleElem = Builder.CreateInsertElement(UndefVal, V, Zero);
   // Broadcast the scalar into all locations in the vector.
   Value *Shuf = Builder.CreateShuffleVector(SingleElem, UndefVal, Zeros,
-                                             "broadcast");
-  // We are accessing the induction variable. Make sure to promote the
-  // index for each consecutive SIMD lane. This adds 0,1,2 ... to all lanes.
-  if (V == Induction)
-    return getConsecutiveVector(Shuf);
+                                            "broadcast");
+
+  // Restore the builder insertion point.
+  if (Invariant)
+    Builder.SetInsertPoint(Loc);
+
   return Shuf;
 }
 
-Value *SingleBlockLoopVectorizer::getConsecutiveVector(Value* Val) {
+Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, bool Negate) {
   assert(Val->getType()->isVectorTy() && "Must be a vector");
   assert(Val->getType()->getScalarType()->isIntegerTy() &&
          "Elem must be an integer");
   // Create the types.
   Type *ITy = Val->getType()->getScalarType();
   VectorType *Ty = cast<VectorType>(Val->getType());
-  unsigned VLen = Ty->getNumElements();
+  int VLen = Ty->getNumElements();
   SmallVector<Constant*, 8> Indices;
 
   // Create a vector of consecutive numbers from zero to VF.
-  for (unsigned i = 0; i < VLen; ++i)
-    Indices.push_back(ConstantInt::get(ITy, i));
+  for (int i = 0; i < VLen; ++i)
+    Indices.push_back(ConstantInt::get(ITy, Negate ? (-i): i ));
 
   // Add the consecutive indices to the vector value.
   Constant *Cv = ConstantVector::get(Indices);
@@ -554,10 +206,13 @@ Value *SingleBlockLoopVectorizer::getConsecutiveVector(Value* Val) {
 bool LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
   assert(Ptr->getType()->isPointerTy() && "Unexpected non ptr");
 
-  // If this pointer is an induction variable, return it.
+  // If this value is a pointer induction variable we know it is consecutive.
   PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
-  if (Phi && getInductionVars()->count(Phi))
-    return true;
+  if (Phi && Inductions.count(Phi)) {
+    InductionInfo II = Inductions[Phi];
+    if (PtrInduction == II.IK)
+      return true;
+  }
 
   GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
   if (!Gep)
@@ -571,7 +226,7 @@ bool LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
     if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
       return false;
 
-  // We can emit wide load/stores only of the last index is the induction
+  // We can emit wide load/stores only if the last index is the induction
   // variable.
   const SCEV *Last = SE->getSCEV(LastIndex);
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
@@ -590,7 +245,8 @@ bool LoopVectorizationLegality::isUniform(Value *V) {
   return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
 }
 
-Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) {
+Value *InnerLoopVectorizer::getVectorValue(Value *V) {
+  assert(V != Induction && "The new induction variable should not be used.");
   assert(!V->getType()->isVectorTy() && "Can't widen a vector");
   // If we saved a vectorized copy of V, use it.
   Value *&MapEntry = WidenMap[V];
@@ -604,11 +260,11 @@ Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) {
 }
 
 Constant*
-SingleBlockLoopVectorizer::getUniformVector(unsigned Val, Type* ScalarTy) {
+InnerLoopVectorizer::getUniformVector(unsigned Val, Type* ScalarTy) {
   return ConstantVector::getSplat(VF, ConstantInt::get(ScalarTy, Val, true));
 }
 
-void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
+void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
   assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
   // Holds vector parameters or scalars, in case of uniform vals.
   SmallVector<Value*, 8> Params;
@@ -619,7 +275,7 @@ void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
 
     // If we are accessing the old induction variable, use the new one.
     if (SrcOp == OldInduction) {
-      Params.push_back(getVectorValue(Induction));
+      Params.push_back(getVectorValue(SrcOp));
       continue;
     }
 
@@ -679,10 +335,10 @@ void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
 }
 
 Value*
-SingleBlockLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
-                                           Instruction *Loc) {
+InnerLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
+                                     Instruction *Loc) {
   LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
-    Legal->getRuntimePointerCheck();
+  Legal->getRuntimePointerCheck();
 
   if (!PtrRtCheck->Need)
     return NULL;
@@ -695,22 +351,21 @@ SingleBlockLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
   SCEVExpander Exp(*SE, "induction");
 
   // Use this type for pointer arithmetic.
-  Type* PtrArithTy = PtrRtCheck->Pointers[0]->getType();
+  Type* PtrArithTy = Type::getInt8PtrTy(Loc->getContext(), 0);
 
-  for (unsigned i=0; i < NumPointers; ++i) {
+  for (unsigned i = 0; i < NumPointers; ++i) {
     Value *Ptr = PtrRtCheck->Pointers[i];
     const SCEV *Sc = SE->getSCEV(Ptr);
 
     if (SE->isLoopInvariant(Sc, OrigLoop)) {
-      DEBUG(dbgs() << "LV1: Adding RT check for a loop invariant ptr:" <<
+      DEBUG(dbgs() << "LV: Adding RT check for a loop invariant ptr:" <<
             *Ptr <<"\n");
       Starts.push_back(Ptr);
       Ends.push_back(Ptr);
     } else {
       DEBUG(dbgs() << "LV: Adding RT check for range:" << *Ptr <<"\n");
 
-      Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i],
-                                       PtrArithTy, Loc);
+      Value *Start = Exp.expandCodeFor(PtrRtCheck->Starts[i], PtrArithTy, Loc);
       Value *End = Exp.expandCodeFor(PtrRtCheck->Ends[i], PtrArithTy, Loc);
       Starts.push_back(Start);
       Ends.push_back(End);
@@ -719,10 +374,16 @@ SingleBlockLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
 
   for (unsigned i = 0; i < NumPointers; ++i) {
     for (unsigned j = i+1; j < NumPointers; ++j) {
+      Instruction::CastOps Op = Instruction::BitCast;
+      Value *Start0 = CastInst::Create(Op, Starts[i], PtrArithTy, "bc", Loc);
+      Value *Start1 = CastInst::Create(Op, Starts[j], PtrArithTy, "bc", Loc);
+      Value *End0 =   CastInst::Create(Op, Ends[i],   PtrArithTy, "bc", Loc);
+      Value *End1 =   CastInst::Create(Op, Ends[j],   PtrArithTy, "bc", Loc);
+
       Value *Cmp0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
-                                    Starts[i], Ends[j], "bound0", Loc);
+                                    Start0, End1, "bound0", Loc);
       Value *Cmp1 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULE,
-                                    Starts[j], Ends[i], "bound1", Loc);
+                                    Start1, End0, "bound1", Loc);
       Value *IsConflict = BinaryOperator::Create(Instruction::And, Cmp0, Cmp1,
                                                  "found.conflict", Loc);
       if (MemoryRuntimeCheck)
@@ -740,32 +401,32 @@ SingleBlockLoopVectorizer::addRuntimeCheck(LoopVectorizationLegality *Legal,
 }
 
 void
-SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
+InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
   /*
    In this function we generate a new loop. The new loop will contain
    the vectorized instructions while the old loop will continue to run the
    scalar remainder.
 
-    [ ] <-- vector loop bypass.
-  /  |
- /   v
-|   [ ]     <-- vector pre header.
-|    |
-|    v
-|   [  ] \
-|   [  ]_|   <-- vector loop.
-|    |
- \   v
+   [ ] <-- vector loop bypass.
+   /  |
+   /   v
+   |   [ ]     <-- vector pre header.
+   |    |
+   |    v
+   |   [  ] \
+   |   [  ]_|   <-- vector loop.
+   |    |
+   \   v
    >[ ]   <--- middle-block.
-  /  |
- /   v
-|   [ ]     <--- new preheader.
-|    |
-|    v
-|   [ ] \
-|   [ ]_|   <-- old scalar loop to handle remainder.
- \   |
-  \  v
+   /  |
+   /   v
+   |   [ ]     <--- new preheader.
+   |    |
+   |    v
+   |   [ ] \
+   |   [ ]_|   <-- old scalar loop to handle remainder.
+   \   |
+   \  v
    >[ ]     <-- exit block.
    ...
    */
@@ -781,10 +442,10 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
   // don't have a single induction variable.
   OldInduction = Legal->getInduction();
   Type *IdxTy = OldInduction ? OldInduction->getType() :
-    DL->getIntPtrType(SE->getContext());
+  DL->getIntPtrType(SE->getContext());
 
   // Find the loop boundaries.
-  const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
+  const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getLoopLatch());
   assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
 
   // Get the total trip count from the count by adding 1.
@@ -803,10 +464,9 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
   // value from the induction PHI node. If we don't have an induction variable
   // then we know that it starts at zero.
   Value *StartIdx = OldInduction ?
-    OldInduction->getIncomingValueForBlock(BypassBlock):
-    ConstantInt::get(IdxTy, 0);
+  OldInduction->getIncomingValueForBlock(BypassBlock):
+  ConstantInt::get(IdxTy, 0);
 
-  assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop");
   assert(BypassBlock && "Invalid loop structure");
 
   // Generate the code that checks in runtime if arrays overlap.
@@ -815,13 +475,13 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
 
   // Split the single block loop into the two loop structure described above.
   BasicBlock *VectorPH =
-      BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph");
+  BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph");
   BasicBlock *VecBody =
-    VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
+  VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
   BasicBlock *MiddleBlock =
-    VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block");
+  VecBody->splitBasicBlock(VecBody->getTerminator(), "middle.block");
   BasicBlock *ScalarPH =
-    MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph");
+  MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(), "scalar.ph");
 
   // This is the location in which we add all of the logic for bypassing
   // the new vector loop.
@@ -878,8 +538,8 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
   // PHIs that are left in the scalar version of the loop.
   // The starting values of PHI nodes depend on the counter of the last
   // iteration in the vectorized loop.
-  // If we come from a bypass edge then we need to start from the original start
-  // value.
+  // If we come from a bypass edge then we need to start from the original
+  // start value.
 
   // This variable saves the new starting index for the scalar loop.
   PHINode *ResumeIndex = 0;
@@ -887,27 +547,54 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
   LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
   for (I = List->begin(), E = List->end(); I != E; ++I) {
     PHINode *OrigPhi = I->first;
+    LoopVectorizationLegality::InductionInfo II = I->second;
     PHINode *ResumeVal = PHINode::Create(OrigPhi->getType(), 2, "resume.val",
-                                           MiddleBlock->getTerminator());
+                                         MiddleBlock->getTerminator());
     Value *EndValue = 0;
-    if (OrigPhi->getType()->isIntegerTy()) {
+    switch (II.IK) {
+    case LoopVectorizationLegality::NoInduction:
+      llvm_unreachable("Unknown induction");
+    case LoopVectorizationLegality::IntInduction: {
       // Handle the integer induction counter:
+      assert(OrigPhi->getType()->isIntegerTy() && "Invalid type");
       assert(OrigPhi == OldInduction && "Unknown integer PHI");
       // We know what the end value is.
       EndValue = IdxEndRoundDown;
       // We also know which PHI node holds it.
       ResumeIndex = ResumeVal;
-    } else {
+      break;
+    }
+    case LoopVectorizationLegality::ReverseIntInduction: {
+      // Convert the CountRoundDown variable to the PHI size.
+      unsigned CRDSize = CountRoundDown->getType()->getScalarSizeInBits();
+      unsigned IISize = II.StartValue->getType()->getScalarSizeInBits();
+      Value *CRD = CountRoundDown;
+      if (CRDSize > IISize)
+        CRD = CastInst::Create(Instruction::Trunc, CountRoundDown,
+                               II.StartValue->getType(),
+                               "tr.crd", BypassBlock->getTerminator());
+      else if (CRDSize < IISize)
+        CRD = CastInst::Create(Instruction::SExt, CountRoundDown,
+                               II.StartValue->getType(),
+                               "sext.crd", BypassBlock->getTerminator());
+      // Handle reverse integer induction counter:
+      EndValue = BinaryOperator::CreateSub(II.StartValue, CRD, "rev.ind.end",
+                                           BypassBlock->getTerminator());
+      break;
+    }
+    case LoopVectorizationLegality::PtrInduction: {
       // For pointer induction variables, calculate the offset using
       // the end index.
-      EndValue = GetElementPtrInst::Create(I->second, CountRoundDown,
+      EndValue = GetElementPtrInst::Create(II.StartValue, CountRoundDown,
                                            "ptr.ind.end",
                                            BypassBlock->getTerminator());
+      break;
     }
+    }// end of case
 
     // The new PHI merges the original incoming value, in case of a bypass,
     // or the value at the end of the vectorized loop.
-    ResumeVal->addIncoming(I->second, BypassBlock);
+    ResumeVal->addIncoming(II.StartValue, BypassBlock);
     ResumeVal->addIncoming(EndValue, VecBody);
 
     // Fix the scalar body counter (PHI node).
@@ -956,19 +643,22 @@ SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
   // Get ready to start creating new instructions into the vectorized body.
   Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
 
-  // Register the new loop.
+  // Create and register the new vector loop.
   Loop* Lp = new Loop();
-  LPM->insertLoop(Lp, OrigLoop->getParentLoop());
-
-  Lp->addBasicBlockToLoop(VecBody, LI->getBase());
-
   Loop *ParentLoop = OrigLoop->getParentLoop();
+
+  // Insert the new loop into the loop nest and register the new basic blocks.
   if (ParentLoop) {
+    ParentLoop->addChildLoop(Lp);
     ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase());
     ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase());
     ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase());
+  } else {
+    LI->addTopLevelLoop(Lp);
   }
 
+  Lp->addBasicBlockToLoop(VecBody, LI->getBase());
+
   // Save the state.
   LoopVectorPreHeader = VectorPH;
   LoopScalarPreHeader = ScalarPH;
@@ -1000,8 +690,37 @@ getReductionIdentity(LoopVectorizationLegality::ReductionKind K) {
   }
 }
 
+static bool
+isTriviallyVectorizableIntrinsic(Instruction *Inst) {
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
+  if (!II)
+    return false;
+  switch (II->getIntrinsicID()) {
+  case Intrinsic::sqrt:
+  case Intrinsic::sin:
+  case Intrinsic::cos:
+  case Intrinsic::exp:
+  case Intrinsic::exp2:
+  case Intrinsic::log:
+  case Intrinsic::log10:
+  case Intrinsic::log2:
+  case Intrinsic::fabs:
+  case Intrinsic::floor:
+  case Intrinsic::ceil:
+  case Intrinsic::trunc:
+  case Intrinsic::rint:
+  case Intrinsic::nearbyint:
+  case Intrinsic::pow:
+  case Intrinsic::fma:
+    return true;
+  default:
+    return false;
+  }
+  return false;
+}
+
 void
-SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
+InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
   //===------------------------------------------------===//
   //
   // Notice: any optimization or new instruction that go
@@ -1009,14 +728,13 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
   // the cost-model.
   //
   //===------------------------------------------------===//
-  typedef SmallVector<PHINode*, 4> PhiVector;
   BasicBlock &BB = *OrigLoop->getHeader();
-  Constant *Zero = ConstantInt::get(
-    IntegerType::getInt32Ty(BB.getContext()), 0);
+  Constant *Zero =
+  ConstantInt::get(IntegerType::getInt32Ty(BB.getContext()), 0);
 
   // In order to support reduction variables we need to be able to vectorize
   // Phi nodes. Phi nodes have cycles, so we need to vectorize them in two
-  // steages. First, we create a new vector PHI node with no incoming edges.
+  // stages. First, we create a new vector PHI node with no incoming edges.
   // We use this value when we vectorize all of the instructions that use the
   // PHI. Next, after all of the instructions in the block are complete we
   // add the new incoming edges to the PHI. At this point all of the
@@ -1024,245 +742,17 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
   // construct the PHI.
   PhiVector RdxPHIsToFix;
 
-  // For each instruction in the old loop.
-  for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
-    Instruction *Inst = it;
-
-    switch (Inst->getOpcode()) {
-      case Instruction::Br:
-        // Nothing to do for PHIs and BR, since we already took care of the
-        // loop control flow instructions.
-        continue;
-      case Instruction::PHI:{
-        PHINode* P = cast<PHINode>(Inst);
-        // Handle reduction variables:
-        if (Legal->getReductionVars()->count(P)) {
-          // This is phase one of vectorizing PHIs.
-          Type *VecTy = VectorType::get(Inst->getType(), VF);
-          WidenMap[Inst] = PHINode::Create(VecTy, 2, "vec.phi",
-                                  LoopVectorBody->getFirstInsertionPt());
-          RdxPHIsToFix.push_back(P);
-          continue;
-        }
-
-        // This PHINode must be an induction variable.
-        // Make sure that we know about it.
-        assert(Legal->getInductionVars()->count(P) &&
-               "Not an induction variable");
-
-        if (P->getType()->isIntegerTy()) {
-          assert(P == OldInduction && "Unexpected PHI");
-          WidenMap[Inst] = getBroadcastInstrs(Induction);
-          continue;
-        }
-
-        // Handle pointer inductions.
-        assert(P->getType()->isPointerTy() && "Unexpected type.");
-        Value *StartIdx = OldInduction ?
-          Legal->getInductionVars()->lookup(OldInduction) :
-          ConstantInt::get(Induction->getType(), 0);
-
-        // This is the pointer value coming into the loop.
-        Value *StartPtr = Legal->getInductionVars()->lookup(P);
-
-        // This is the normalized GEP that starts counting at zero.
-        Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
-                                                 "normalized.idx");
+  // Scan the loop in a topological order to ensure that defs are vectorized
+  // before users.
+  LoopBlocksDFS DFS(OrigLoop);
+  DFS.perform(LI);
 
-        // This is the vector of results. Notice that we don't generate vector
-        // geps because scalar geps result in better code.
-        Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
-        for (unsigned int i = 0; i < VF; ++i) {
-          Constant *Idx = ConstantInt::get(Induction->getType(), i);
-          Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
-          Value *SclrGep = Builder.CreateGEP(StartPtr, GlobalIdx, "next.gep");
-          VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
-                                               Builder.getInt32(i),
-                                               "insert.gep");
-        }
+  // Vectorize all of the blocks in the original loop.
+  for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
+       be = DFS.endRPO(); bb != be; ++bb)
+    vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix);
 
-        WidenMap[Inst] = VecVal;
-        continue;
-      }
-      case Instruction::Add:
-      case Instruction::FAdd:
-      case Instruction::Sub:
-      case Instruction::FSub:
-      case Instruction::Mul:
-      case Instruction::FMul:
-      case Instruction::UDiv:
-      case Instruction::SDiv:
-      case Instruction::FDiv:
-      case Instruction::URem:
-      case Instruction::SRem:
-      case Instruction::FRem:
-      case Instruction::Shl:
-      case Instruction::LShr:
-      case Instruction::AShr:
-      case Instruction::And:
-      case Instruction::Or:
-      case Instruction::Xor: {
-        // Just widen binops.
-        BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
-        Value *A = getVectorValue(Inst->getOperand(0));
-        Value *B = getVectorValue(Inst->getOperand(1));
-
-        // Use this vector value for all users of the original instruction.
-        Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
-        WidenMap[Inst] = V;
-
-        // Update the NSW, NUW and Exact flags.
-        BinaryOperator *VecOp = cast<BinaryOperator>(V);
-        if (isa<OverflowingBinaryOperator>(BinOp)) {
-          VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap());
-          VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap());
-        }
-        if (isa<PossiblyExactOperator>(VecOp))
-          VecOp->setIsExact(BinOp->isExact());
-        break;
-      }
-      case Instruction::Select: {
-        // Widen selects.
-        // If the selector is loop invariant we can create a select
-        // instruction with a scalar condition. Otherwise, use vector-select.
-        Value *Cond = Inst->getOperand(0);
-        bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(Cond), OrigLoop);
-
-        // The condition can be loop invariant  but still defined inside the
-        // loop. This means that we can't just use the original 'cond' value.
-        // We have to take the 'vectorized' value and pick the first lane.
-        // Instcombine will make this a no-op.
-        Cond = getVectorValue(Cond);
-        if (InvariantCond)
-          Cond = Builder.CreateExtractElement(Cond, Builder.getInt32(0));
-
-        Value *Op0 = getVectorValue(Inst->getOperand(1));
-        Value *Op1 = getVectorValue(Inst->getOperand(2));
-        WidenMap[Inst] = Builder.CreateSelect(Cond, Op0, Op1);
-        break;
-      }
-
-      case Instruction::ICmp:
-      case Instruction::FCmp: {
-        // Widen compares. Generate vector compares.
-        bool FCmp = (Inst->getOpcode() == Instruction::FCmp);
-        CmpInst *Cmp = dyn_cast<CmpInst>(Inst);
-        Value *A = getVectorValue(Inst->getOperand(0));
-        Value *B = getVectorValue(Inst->getOperand(1));
-        if (FCmp)
-          WidenMap[Inst] = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
-        else
-          WidenMap[Inst] = Builder.CreateICmp(Cmp->getPredicate(), A, B);
-        break;
-      }
-
-      case Instruction::Store: {
-        // Attempt to issue a wide store.
-        StoreInst *SI = dyn_cast<StoreInst>(Inst);
-        Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF);
-        Value *Ptr = SI->getPointerOperand();
-        unsigned Alignment = SI->getAlignment();
-
-        assert(!Legal->isUniform(Ptr) &&
-               "We do not allow storing to uniform addresses");
-
-        GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
-
-        // This store does not use GEPs.
-        if (!Legal->isConsecutivePtr(Ptr)) {
-          scalarizeInstruction(Inst);
-          break;
-        }
-
-        if (Gep) {
-          // The last index does not have to be the induction. It can be
-          // consecutive and be a function of the index. For example A[I+1];
-          unsigned NumOperands = Gep->getNumOperands();
-          Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1));
-          LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
-
-          // Create the new GEP with the new induction variable.
-          GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
-          Gep2->setOperand(NumOperands - 1, LastIndex);
-          Ptr = Builder.Insert(Gep2);
-        } else {
-          // Use the induction element ptr.
-          assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
-          Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero);
-        }
-        Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo());
-        Value *Val = getVectorValue(SI->getValueOperand());
-        Builder.CreateStore(Val, Ptr)->setAlignment(Alignment);
-        break;
-      }
-      case Instruction::Load: {
-        // Attempt to issue a wide load.
-        LoadInst *LI = dyn_cast<LoadInst>(Inst);
-        Type *RetTy = VectorType::get(LI->getType(), VF);
-        Value *Ptr = LI->getPointerOperand();
-        unsigned Alignment = LI->getAlignment();
-        GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
-
-        // If the pointer is loop invariant or if it is non consecutive,
-        // scalarize the load.
-        bool Con = Legal->isConsecutivePtr(Ptr);
-        if (Legal->isUniform(Ptr) || !Con) {
-          scalarizeInstruction(Inst);
-          break;
-        }
-
-        if (Gep) {
-          // The last index does not have to be the induction. It can be
-          // consecutive and be a function of the index. For example A[I+1];
-          unsigned NumOperands = Gep->getNumOperands();
-          Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
-          LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
-
-          // Create the new GEP with the new induction variable.
-          GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
-          Gep2->setOperand(NumOperands - 1, LastIndex);
-          Ptr = Builder.Insert(Gep2);
-        } else {
-          // Use the induction element ptr.
-          assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
-          Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero);
-        }
-
-        Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo());
-        LI = Builder.CreateLoad(Ptr);
-        LI->setAlignment(Alignment);
-        // Use this vector value for all users of the load.
-        WidenMap[Inst] = LI;
-        break;
-      }
-      case Instruction::ZExt:
-      case Instruction::SExt:
-      case Instruction::FPToUI:
-      case Instruction::FPToSI:
-      case Instruction::FPExt:
-      case Instruction::PtrToInt:
-      case Instruction::IntToPtr:
-      case Instruction::SIToFP:
-      case Instruction::UIToFP:
-      case Instruction::Trunc:
-      case Instruction::FPTrunc:
-      case Instruction::BitCast: {
-        /// Vectorize bitcasts.
-        CastInst *CI = dyn_cast<CastInst>(Inst);
-        Value *A = getVectorValue(Inst->getOperand(0));
-        Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
-        WidenMap[Inst] = Builder.CreateCast(CI->getOpcode(), A, DestTy);
-        break;
-      }
-
-      default:
-        /// All other instructions are unsupported. Scalarize them.
-        scalarizeInstruction(Inst);
-        break;
-    }// end of switch.
-  }// end of for_each instr.
-
-  // At this point every instruction in the original loop is widended to
+  // At this point every instruction in the original loop is widened to
   // a vector form. We are almost done. Now, we need to fix the PHI nodes
   // that we vectorized. The PHI nodes are currently empty because we did
   // not want to introduce cycles. Notice that the remaining PHI nodes
@@ -1281,7 +771,7 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
     assert(Legal->getReductionVars()->count(RdxPhi) &&
            "Unable to find the reduction variable");
     LoopVectorizationLegality::ReductionDescriptor RdxDesc =
-      (*Legal->getReductionVars())[RdxPhi];
+    (*Legal->getReductionVars())[RdxPhi];
 
     // We need to generate a reduction vector from the incoming scalar.
     // To do so, we need to generate the 'identity' vector and overide
@@ -1301,7 +791,7 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
     // This vector is the Identity vector where the first element is the
     // incoming scalar reduction.
     Value *VectorStart = Builder.CreateInsertElement(Identity,
-                                                    RdxDesc.StartValue, Zero);
+                                                     RdxDesc.StartValue, Zero);
 
     // Fix the vector-loop phi.
     // We created the induction variable so we know that the
@@ -1311,8 +801,8 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
     // Reductions do not have to start at zero. They can start with
     // any loop invariant values.
     VecRdxPhi->addIncoming(VectorStart, VecPreheader);
-    unsigned SelfEdgeIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody);
-    Value *Val = getVectorValue(RdxPhi->getIncomingValue(SelfEdgeIdx));
+    Value *Val =
+    getVectorValue(RdxPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
     VecRdxPhi->addIncoming(Val, LoopVectorBody);
 
     // Before each round, move the insertion point right between
@@ -1329,29 +819,29 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
 
     // Extract the first scalar.
     Value *Scalar0 =
-      Builder.CreateExtractElement(NewPhi, Builder.getInt32(0));
+    Builder.CreateExtractElement(NewPhi, Builder.getInt32(0));
     // Extract and reduce the remaining vector elements.
     for (unsigned i=1; i < VF; ++i) {
       Value *Scalar1 =
-        Builder.CreateExtractElement(NewPhi, Builder.getInt32(i));
+      Builder.CreateExtractElement(NewPhi, Builder.getInt32(i));
       switch (RdxDesc.Kind) {
-        case LoopVectorizationLegality::IntegerAdd:
-          Scalar0 = Builder.CreateAdd(Scalar0, Scalar1);
-          break;
-        case LoopVectorizationLegality::IntegerMult:
-          Scalar0 = Builder.CreateMul(Scalar0, Scalar1);
-          break;
-        case LoopVectorizationLegality::IntegerOr:
-          Scalar0 = Builder.CreateOr(Scalar0, Scalar1);
-          break;
-        case LoopVectorizationLegality::IntegerAnd:
-          Scalar0 = Builder.CreateAnd(Scalar0, Scalar1);
-          break;
-        case LoopVectorizationLegality::IntegerXor:
-          Scalar0 = Builder.CreateXor(Scalar0, Scalar1);
-          break;
-        default:
-          llvm_unreachable("Unknown reduction operation");
+      case LoopVectorizationLegality::IntegerAdd:
+        Scalar0 = Builder.CreateAdd(Scalar0, Scalar1, "add.rdx");
+        break;
+      case LoopVectorizationLegality::IntegerMult:
+        Scalar0 = Builder.CreateMul(Scalar0, Scalar1, "mul.rdx");
+        break;
+      case LoopVectorizationLegality::IntegerOr:
+        Scalar0 = Builder.CreateOr(Scalar0, Scalar1, "or.rdx");
+        break;
+      case LoopVectorizationLegality::IntegerAnd:
+        Scalar0 = Builder.CreateAnd(Scalar0, Scalar1, "and.rdx");
+        break;
+      case LoopVectorizationLegality::IntegerXor:
+        Scalar0 = Builder.CreateXor(Scalar0, Scalar1, "xor.rdx");
+        break;
+      default:
+        llvm_unreachable("Unknown reduction operation");
       }
     }
 
@@ -1379,15 +869,373 @@ SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
 
     // Fix the scalar loop reduction variable with the incoming reduction sum
     // from the vector body and from the backedge value.
-    int IncomingEdgeBlockIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody);
-    int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); // The other block.
+    int IncomingEdgeBlockIdx =
+    (RdxPhi)->getBasicBlockIndex(OrigLoop->getLoopLatch());
+    assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
+    // Pick the other block.
+    int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
     (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0);
     (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
   }// end of for each redux variable.
 }
 
-void SingleBlockLoopVectorizer::updateAnalysis() {
-  // The original basic block.
+Value *InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
+  assert(std::find(pred_begin(Dst), pred_end(Dst), Src) != pred_end(Dst) &&
+         "Invalid edge");
+
+  Value *SrcMask = createBlockInMask(Src);
+
+  // The terminator has to be a branch inst!
+  BranchInst *BI = dyn_cast<BranchInst>(Src->getTerminator());
+  assert(BI && "Unexpected terminator found");
+
+  Value *EdgeMask = SrcMask;
+  if (BI->isConditional()) {
+    EdgeMask = getVectorValue(BI->getCondition());
+    if (BI->getSuccessor(0) != Dst)
+      EdgeMask = Builder.CreateNot(EdgeMask);
+  }
+
+  return Builder.CreateAnd(EdgeMask, SrcMask);
+}
+
+Value *InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
+  assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
+
+  // Loop incoming mask is all-one.
+  if (OrigLoop->getHeader() == BB) {
+    Value *C = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 1);
+    return getVectorValue(C);
+  }
+
+  // This is the block mask. We OR all incoming edges, and with zero.
+  Value *Zero = ConstantInt::get(IntegerType::getInt1Ty(BB->getContext()), 0);
+  Value *BlockMask = getVectorValue(Zero);
+
+  // For each pred:
+  for (pred_iterator it = pred_begin(BB), e = pred_end(BB); it != e; ++it)
+    BlockMask = Builder.CreateOr(BlockMask, createEdgeMask(*it, BB));
+
+  return BlockMask;
+}
+
+void
+InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
+                                          BasicBlock *BB, PhiVector *PV) {
+  Constant *Zero =
+  ConstantInt::get(IntegerType::getInt32Ty(BB->getContext()), 0);
+
+  // For each instruction in the old loop.
+  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+    switch (it->getOpcode()) {
+    case Instruction::Br:
+      // Nothing to do for PHIs and BR, since we already took care of the
+      // loop control flow instructions.
+      continue;
+    case Instruction::PHI:{
+      PHINode* P = cast<PHINode>(it);
+      // Handle reduction variables:
+      if (Legal->getReductionVars()->count(P)) {
+        // This is phase one of vectorizing PHIs.
+        Type *VecTy = VectorType::get(it->getType(), VF);
+        WidenMap[it] =
+          PHINode::Create(VecTy, 2, "vec.phi",
+                          LoopVectorBody->getFirstInsertionPt());
+        PV->push_back(P);
+        continue;
+      }
+
+      // Check for PHI nodes that are lowered to vector selects.
+      if (P->getParent() != OrigLoop->getHeader()) {
+        // We know that all PHIs in non header blocks are converted into
+        // selects, so we don't have to worry about the insertion order and we
+        // can just use the builder.
+
+        // At this point we generate the predication tree. There may be
+        // duplications since this is a simple recursive scan, but future
+        // optimizations will clean it up.
+        Value *Cond = createBlockInMask(P->getIncomingBlock(0));
+        WidenMap[P] =
+          Builder.CreateSelect(Cond,
+                               getVectorValue(P->getIncomingValue(0)),
+                               getVectorValue(P->getIncomingValue(1)),
+                               "predphi");
+        continue;
+      }
+
+      // This PHINode must be an induction variable.
+      // Make sure that we know about it.
+      assert(Legal->getInductionVars()->count(P) &&
+             "Not an induction variable");
+
+      LoopVectorizationLegality::InductionInfo II =
+        Legal->getInductionVars()->lookup(P);
+
+      switch (II.IK) {
+      case LoopVectorizationLegality::NoInduction:
+        llvm_unreachable("Unknown induction");
+      case LoopVectorizationLegality::IntInduction: {
+        assert(P == OldInduction && "Unexpected PHI");
+        Value *Broadcasted = getBroadcastInstrs(Induction);
+        // After broadcasting the induction variable we need to make the
+        // vector consecutive by adding 0, 1, 2 ...
+        Value *ConsecutiveInduction = getConsecutiveVector(Broadcasted);
+        WidenMap[OldInduction] = ConsecutiveInduction;
+        continue;
+      }
+      case LoopVectorizationLegality::ReverseIntInduction:
+      case LoopVectorizationLegality::PtrInduction:
+        // Handle reverse integer and pointer inductions.
+        Value *StartIdx = 0;
+        // If we have a single integer induction variable then use it.
+        // Otherwise, start counting at zero.
+        if (OldInduction) {
+          LoopVectorizationLegality::InductionInfo OldII =
+            Legal->getInductionVars()->lookup(OldInduction);
+          StartIdx = OldII.StartValue;
+        } else {
+          StartIdx = ConstantInt::get(Induction->getType(), 0);
+        }
+        // This is the normalized GEP that starts counting at zero.
+        Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
+                                                 "normalized.idx");
+
+        // Handle the reverse integer induction variable case.
+        if (LoopVectorizationLegality::ReverseIntInduction == II.IK) {
+          IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
+          Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
+                                                 "resize.norm.idx");
+          Value *ReverseInd  = Builder.CreateSub(II.StartValue, CNI,
+                                                 "reverse.idx");
+
+          // This is a new value so do not hoist it out.
+          Value *Broadcasted = getBroadcastInstrs(ReverseInd);
+          // After broadcasting the induction variable we need to make the
+          // vector consecutive by adding  ... -3, -2, -1, 0.
+          Value *ConsecutiveInduction = getConsecutiveVector(Broadcasted,
+                                                             true);
+          WidenMap[it] = ConsecutiveInduction;
+          continue;
+        }
+
+        // Handle the pointer induction variable case.
+        assert(P->getType()->isPointerTy() && "Unexpected type.");
+
+        // This is the vector of results. Notice that we don't generate
+        // vector geps because scalar geps result in better code.
+        Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
+        for (unsigned int i = 0; i < VF; ++i) {
+          Constant *Idx = ConstantInt::get(Induction->getType(), i);
+          Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx,
+                                               "gep.idx");
+          Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
+                                             "next.gep");
+          VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
+                                               Builder.getInt32(i),
+                                               "insert.gep");
+        }
+
+        WidenMap[it] = VecVal;
+        continue;
+      }
+
+    }// End of PHI.
+
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::FDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::FRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor: {
+      // Just widen binops.
+      BinaryOperator *BinOp = dyn_cast<BinaryOperator>(it);
+      Value *A = getVectorValue(it->getOperand(0));
+      Value *B = getVectorValue(it->getOperand(1));
+
+      // Use this vector value for all users of the original instruction.
+      Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
+      WidenMap[it] = V;
+
+      // Update the NSW, NUW and Exact flags.
+      BinaryOperator *VecOp = cast<BinaryOperator>(V);
+      if (isa<OverflowingBinaryOperator>(BinOp)) {
+        VecOp->setHasNoSignedWrap(BinOp->hasNoSignedWrap());
+        VecOp->setHasNoUnsignedWrap(BinOp->hasNoUnsignedWrap());
+      }
+      if (isa<PossiblyExactOperator>(VecOp))
+        VecOp->setIsExact(BinOp->isExact());
+      break;
+    }
+    case Instruction::Select: {
+      // Widen selects.
+      // If the selector is loop invariant we can create a select
+      // instruction with a scalar condition. Otherwise, use vector-select.
+      Value *Cond = it->getOperand(0);
+      bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(Cond), OrigLoop);
+
+      // The condition can be loop invariant  but still defined inside the
+      // loop. This means that we can't just use the original 'cond' value.
+      // We have to take the 'vectorized' value and pick the first lane.
+      // Instcombine will make this a no-op.
+      Cond = getVectorValue(Cond);
+      if (InvariantCond)
+        Cond = Builder.CreateExtractElement(Cond, Builder.getInt32(0));
+
+      Value *Op0 = getVectorValue(it->getOperand(1));
+      Value *Op1 = getVectorValue(it->getOperand(2));
+      WidenMap[it] = Builder.CreateSelect(Cond, Op0, Op1);
+      break;
+    }
+
+    case Instruction::ICmp:
+    case Instruction::FCmp: {
+      // Widen compares. Generate vector compares.
+      bool FCmp = (it->getOpcode() == Instruction::FCmp);
+      CmpInst *Cmp = dyn_cast<CmpInst>(it);
+      Value *A = getVectorValue(it->getOperand(0));
+      Value *B = getVectorValue(it->getOperand(1));
+      if (FCmp)
+        WidenMap[it] = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
+      else
+        WidenMap[it] = Builder.CreateICmp(Cmp->getPredicate(), A, B);
+      break;
+    }
+
+    case Instruction::Store: {
+      // Attempt to issue a wide store.
+      StoreInst *SI = dyn_cast<StoreInst>(it);
+      Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF);
+      Value *Ptr = SI->getPointerOperand();
+      unsigned Alignment = SI->getAlignment();
+
+      assert(!Legal->isUniform(Ptr) &&
+             "We do not allow storing to uniform addresses");
+
+      GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+
+      // This store does not use GEPs.
+      if (!Legal->isConsecutivePtr(Ptr)) {
+        scalarizeInstruction(it);
+        break;
+      }
+
+      if (Gep) {
+        // The last index does not have to be the induction. It can be
+        // consecutive and be a function of the index. For example A[I+1];
+        unsigned NumOperands = Gep->getNumOperands();
+        Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands - 1));
+        LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
+
+        // Create the new GEP with the new induction variable.
+        GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+        Gep2->setOperand(NumOperands - 1, LastIndex);
+        Ptr = Builder.Insert(Gep2);
+      } else {
+        // Use the induction element ptr.
+        assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
+        Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero);
+      }
+      Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo());
+      Value *Val = getVectorValue(SI->getValueOperand());
+      Builder.CreateStore(Val, Ptr)->setAlignment(Alignment);
+      break;
+    }
+    case Instruction::Load: {
+      // Attempt to issue a wide load.
+      LoadInst *LI = dyn_cast<LoadInst>(it);
+      Type *RetTy = VectorType::get(LI->getType(), VF);
+      Value *Ptr = LI->getPointerOperand();
+      unsigned Alignment = LI->getAlignment();
+      GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+
+      // If the pointer is loop invariant or if it is non consecutive,
+      // scalarize the load.
+      bool Con = Legal->isConsecutivePtr(Ptr);
+      if (Legal->isUniform(Ptr) || !Con) {
+        scalarizeInstruction(it);
+        break;
+      }
+
+      if (Gep) {
+        // The last index does not have to be the induction. It can be
+        // consecutive and be a function of the index. For example A[I+1];
+        unsigned NumOperands = Gep->getNumOperands();
+        Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
+        LastIndex = Builder.CreateExtractElement(LastIndex, Zero);
+
+        // Create the new GEP with the new induction variable.
+        GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+        Gep2->setOperand(NumOperands - 1, LastIndex);
+        Ptr = Builder.Insert(Gep2);
+      } else {
+        // Use the induction element ptr.
+        assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
+        Ptr = Builder.CreateExtractElement(getVectorValue(Ptr), Zero);
+      }
+
+      Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo());
+      LI = Builder.CreateLoad(Ptr);
+      LI->setAlignment(Alignment);
+      // Use this vector value for all users of the load.
+      WidenMap[it] = LI;
+      break;
+    }
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::FPExt:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::SIToFP:
+    case Instruction::UIToFP:
+    case Instruction::Trunc:
+    case Instruction::FPTrunc:
+    case Instruction::BitCast: {
+      /// Vectorize bitcasts.
+      CastInst *CI = dyn_cast<CastInst>(it);
+      Value *A = getVectorValue(it->getOperand(0));
+      Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
+      WidenMap[it] = Builder.CreateCast(CI->getOpcode(), A, DestTy);
+      break;
+    }
+
+    case Instruction::Call: {
+      assert(isTriviallyVectorizableIntrinsic(it));
+      Module *M = BB->getParent()->getParent();
+      IntrinsicInst *II = cast<IntrinsicInst>(it);
+      Intrinsic::ID ID = II->getIntrinsicID();
+      SmallVector<Value*, 4> Args;
+      for (unsigned i = 0, ie = II->getNumArgOperands(); i != ie; ++i)
+        Args.push_back(getVectorValue(II->getArgOperand(i)));
+      Type *Tys[] = { VectorType::get(II->getType()->getScalarType(), VF) };
+      Function *F = Intrinsic::getDeclaration(M, ID, Tys);
+      WidenMap[it] = Builder.CreateCall(F, Args);
+      break;
+    }
+
+    default:
+      // All other instructions are unsupported. Scalarize them.
+      scalarizeInstruction(it);
+      break;
+    }// end of switch.
+  }// end of for_each instr.
+}
+
+void InnerLoopVectorizer::updateAnalysis() {
+  // Forget the original basic block.
   SE->forgetLoop(OrigLoop);
 
   // Update the dominator tree information.
@@ -1404,45 +1252,90 @@ void SingleBlockLoopVectorizer::updateAnalysis() {
   DEBUG(DT->verifyAnalysis());
 }
 
-bool LoopVectorizationLegality::canVectorize() {
-  if (!TheLoop->getLoopPreheader()) {
-    assert(false && "No preheader!!");
-    DEBUG(dbgs() << "LV: Loop not normalized." << "\n");
+bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
+  if (!EnableIfConversion)
     return false;
+
+  assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
+  std::vector<BasicBlock*> &LoopBlocks = TheLoop->getBlocksVector();
+
+  // Collect the blocks that need predication.
+  for (unsigned i = 0, e = LoopBlocks.size(); i < e; ++i) {
+    BasicBlock *BB = LoopBlocks[i];
+
+    // We must have at most two predecessors because we need to convert
+    // all PHIs to selects.
+    unsigned Preds = std::distance(pred_begin(BB), pred_end(BB));
+    if (Preds > 2)
+      return false;
+
+    // We must be able to predicate all blocks that need to be predicated.
+    if (blockNeedsPredication(BB) && !blockCanBePredicated(BB))
+      return false;
   }
 
-  // We can only vectorize single basic block loops.
+  // We can if-convert this loop.
+  return true;
+}
+
+bool LoopVectorizationLegality::canVectorize() {
+  assert(TheLoop->getLoopPreheader() && "No preheader!!");
+
+  // We can only vectorize innermost loops.
+  if (TheLoop->getSubLoopsVector().size())
+    return false;
+
+  // We must have a single backedge.
+  if (TheLoop->getNumBackEdges() != 1)
+    return false;
+
+  // We must have a single exiting block.
+  if (!TheLoop->getExitingBlock())
+    return false;
+
   unsigned NumBlocks = TheLoop->getNumBlocks();
-  if (NumBlocks != 1) {
-    DEBUG(dbgs() << "LV: Too many blocks:" << NumBlocks << "\n");
+
+  // Check if we can if-convert non single-bb loops.
+  if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
+    DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
     return false;
   }
 
   // We need to have a loop header.
-  BasicBlock *BB = TheLoop->getHeader();
-  DEBUG(dbgs() << "LV: Found a loop: " << BB->getName() << "\n");
+  BasicBlock *Latch = TheLoop->getLoopLatch();
+  DEBUG(dbgs() << "LV: Found a loop: " <<
+        TheLoop->getHeader()->getName() << "\n");
 
   // ScalarEvolution needs to be able to find the exit count.
-  const SCEV *ExitCount = SE->getExitCount(TheLoop, BB);
+  const SCEV *ExitCount = SE->getExitCount(TheLoop, Latch);
   if (ExitCount == SE->getCouldNotCompute()) {
     DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
     return false;
   }
 
   // Do not loop-vectorize loops with a tiny trip count.
-  unsigned TC = SE->getSmallConstantTripCount(TheLoop, BB);
+  unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch);
   if (TC > 0u && TC < TinyTripCountThreshold) {
     DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " <<
           "This loop is not worth vectorizing.\n");
     return false;
   }
 
+  // Check if we can vectorize the instructions and CFG in this loop.
+  if (!canVectorizeInstrs()) {
+    DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
+    return false;
+  }
+
   // Go over each instruction and look at memory deps.
-  if (!canVectorizeBlock(*BB)) {
-    DEBUG(dbgs() << "LV: Can't vectorize this loop header\n");
+  if (!canVectorizeMemory()) {
+    DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
     return false;
   }
 
+  // Collect all of the variables that remain uniform after vectorization.
+  collectLoopUniforms();
+
   DEBUG(dbgs() << "LV: We can vectorize this loop" <<
         (PtrRtCheck.Need ? " (with a runtime bound check)" : "")
         <<"!\n");
@@ -1453,137 +1346,152 @@ bool LoopVectorizationLegality::canVectorize() {
   return true;
 }
 
-bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
-
+bool LoopVectorizationLegality::canVectorizeInstrs() {
   BasicBlock *PreHeader = TheLoop->getLoopPreheader();
+  BasicBlock *Header = TheLoop->getHeader();
 
-  // Scan the instructions in the block and look for hazards.
-  for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
-    Instruction *I = it;
-
-    if (PHINode *Phi = dyn_cast<PHINode>(I)) {
-      // This should not happen because the loop should be normalized.
-      if (Phi->getNumIncomingValues() != 2) {
-        DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
-        return false;
-      }
+  // For each block in the loop.
+  for (Loop::block_iterator bb = TheLoop->block_begin(),
+       be = TheLoop->block_end(); bb != be; ++bb) {
 
-      // This is the value coming from the preheader.
-      Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
+    // Scan the instructions in the block and look for hazards.
+    for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
+         ++it) {
 
-      // We only look at integer and pointer phi nodes.
-      if (Phi->getType()->isPointerTy() && isInductionVariable(Phi)) {
-        DEBUG(dbgs() << "LV: Found a pointer induction variable.\n");
-        Inductions[Phi] = StartValue;
-        continue;
-      } else if (!Phi->getType()->isIntegerTy()) {
-        DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
-        return false;
-      }
+      if (PHINode *Phi = dyn_cast<PHINode>(it)) {
+        // This should not happen because the loop should be normalized.
+        if (Phi->getNumIncomingValues() != 2) {
+          DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
+          return false;
+        }
 
-      // Handle integer PHIs:
-      if (isInductionVariable(Phi)) {
-        if (Induction) {
-          DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
+        // Check that this PHI type is allowed.
+        if (!Phi->getType()->isIntegerTy() &&
+            !Phi->getType()->isPointerTy()) {
+          DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
           return false;
         }
-        DEBUG(dbgs() << "LV: Found the induction PHI."<< *Phi <<"\n");
-        Induction = Phi;
-        Inductions[Phi] = StartValue;
-        continue;
-      }
-      if (AddReductionVar(Phi, IntegerAdd)) {
-        DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n");
-        continue;
-      }
-      if (AddReductionVar(Phi, IntegerMult)) {
-        DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n");
-        continue;
-      }
-      if (AddReductionVar(Phi, IntegerOr)) {
-        DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n");
-        continue;
-      }
-      if (AddReductionVar(Phi, IntegerAnd)) {
-        DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n");
-        continue;
-      }
-      if (AddReductionVar(Phi, IntegerXor)) {
-        DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
-        continue;
-      }
 
-      DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
-      return false;
-    }// end of PHI handling
+        // If this PHINode is not in the header block, then we know that we
+        // can convert it to select during if-conversion. No need to check if
+        // the PHIs in this block are induction or reduction variables.
+        if (*bb != Header)
+          continue;
 
-    // We still don't handle functions.
-    CallInst *CI = dyn_cast<CallInst>(I);
-    if (CI) {
-      DEBUG(dbgs() << "LV: Found a call site.\n");
-      return false;
-    }
+        // This is the value coming from the preheader.
+        Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
+        // Check if this is an induction variable.
+        InductionKind IK = isInductionVariable(Phi);
+
+        if (NoInduction != IK) {
+          // Int inductions are special because we only allow one IV.
+          if (IK == IntInduction) {
+            if (Induction) {
+              DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
+              return false;
+            }
+            Induction = Phi;
+          }
+
+          DEBUG(dbgs() << "LV: Found an induction variable.\n");
+          Inductions[Phi] = InductionInfo(StartValue, IK);
+          continue;
+        }
 
-    // We do not re-vectorize vectors.
-    if (!VectorType::isValidElementType(I->getType()) &&
-        !I->getType()->isVoidTy()) {
-      DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n");
-      return false;
-    }
+        if (AddReductionVar(Phi, IntegerAdd)) {
+          DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
+        if (AddReductionVar(Phi, IntegerMult)) {
+          DEBUG(dbgs() << "LV: Found a MUL reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
+        if (AddReductionVar(Phi, IntegerOr)) {
+          DEBUG(dbgs() << "LV: Found an OR reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
+        if (AddReductionVar(Phi, IntegerAnd)) {
+          DEBUG(dbgs() << "LV: Found an AND reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
+        if (AddReductionVar(Phi, IntegerXor)) {
+          DEBUG(dbgs() << "LV: Found a XOR reduction PHI."<< *Phi <<"\n");
+          continue;
+        }
 
-    // Reduction instructions are allowed to have exit users.
-    // All other instructions must not have external users.
-    if (!AllowedExit.count(I))
-      //Check that all of the users of the loop are inside the BB.
-      for (Value::use_iterator it = I->use_begin(), e = I->use_end();
-           it != e; ++it) {
-        Instruction *U = cast<Instruction>(*it);
-        // This user may be a reduction exit value.
-        BasicBlock *Parent = U->getParent();
-        if (Parent != &BB) {
-          DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
-          return false;
+        DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
+        return false;
+      }// end of PHI handling
+
+      // We still don't handle functions.
+      CallInst *CI = dyn_cast<CallInst>(it);
+      if (CI && !isTriviallyVectorizableIntrinsic(it)) {
+        DEBUG(dbgs() << "LV: Found a call site.\n");
+        return false;
+      }
+
+      // We do not re-vectorize vectors.
+      if (!VectorType::isValidElementType(it->getType()) &&
+          !it->getType()->isVoidTy()) {
+        DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n");
+        return false;
+      }
+
+      // Reduction instructions are allowed to have exit users.
+      // All other instructions must not have external users.
+      if (!AllowedExit.count(it))
+        //Check that all of the users of the loop are inside the BB.
+        for (Value::use_iterator I = it->use_begin(), E = it->use_end();
+             I != E; ++I) {
+          Instruction *U = cast<Instruction>(*I);
+          // This user may be a reduction exit value.
+          if (!TheLoop->contains(U)) {
+            DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+            return false;
+          }
         }
-    }
-  } // next instr.
+    } // next instr.
+
+  }
 
   if (!Induction) {
     DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
     assert(getInductionVars()->size() && "No induction variables");
   }
 
-  // Don't vectorize if the memory dependencies do not allow vectorization.
-  if (!canVectorizeMemory(BB))
-    return false;
+  return true;
+}
 
+void LoopVectorizationLegality::collectLoopUniforms() {
   // We now know that the loop is vectorizable!
   // Collect variables that will remain uniform after vectorization.
   std::vector<Value*> Worklist;
+  BasicBlock *Latch = TheLoop->getLoopLatch();
 
   // Start with the conditional branch and walk up the block.
-  Worklist.push_back(BB.getTerminator()->getOperand(0));
+  Worklist.push_back(Latch->getTerminator()->getOperand(0));
 
   while (Worklist.size()) {
     Instruction *I = dyn_cast<Instruction>(Worklist.back());
     Worklist.pop_back();
 
-    // Look at instructions inside this block. Stop when reaching PHI nodes.
-    if (!I || I->getParent() != &BB || isa<PHINode>(I))
+    // Look at instructions inside this loop.
+    // Stop when reaching PHI nodes.
+    // TODO: we need to follow values all over the loop, not only in this block.
+    if (!I || !TheLoop->contains(I) || isa<PHINode>(I))
       continue;
 
     // This is a known uniform.
     Uniforms.insert(I);
 
     // Insert all operands.
-    for (int i=0, Op = I->getNumOperands(); i < Op; ++i) {
+    for (int i = 0, Op = I->getNumOperands(); i < Op; ++i) {
       Worklist.push_back(I->getOperand(i));
     }
   }
-
-  return true;
 }
 
-bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
+bool LoopVectorizationLegality::canVectorizeMemory() {
   typedef SmallVector<Value*, 16> ValueVector;
   typedef SmallPtrSet<Value*, 16> ValueSet;
   // Holds the Load and Store *instructions*.
@@ -1592,35 +1500,40 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
   PtrRtCheck.Pointers.clear();
   PtrRtCheck.Need = false;
 
-  // Scan the BB and collect legal loads and stores.
-  for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
-    Instruction *I = it;
-
-    // If this is a load, save it. If this instruction can read from memory
-    // but is not a load, then we quit. Notice that we don't handle function
-    // calls that read or write.
-    if (I->mayReadFromMemory()) {
-      LoadInst *Ld = dyn_cast<LoadInst>(I);
-      if (!Ld) return false;
-      if (!Ld->isSimple()) {
-        DEBUG(dbgs() << "LV: Found a non-simple load.\n");
-        return false;
+  // For each block.
+  for (Loop::block_iterator bb = TheLoop->block_begin(),
+       be = TheLoop->block_end(); bb != be; ++bb) {
+
+    // Scan the BB and collect legal loads and stores.
+    for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
+         ++it) {
+
+      // If this is a load, save it. If this instruction can read from memory
+      // but is not a load, then we quit. Notice that we don't handle function
+      // calls that read or write.
+      if (it->mayReadFromMemory()) {
+        LoadInst *Ld = dyn_cast<LoadInst>(it);
+        if (!Ld) return false;
+        if (!Ld->isSimple()) {
+          DEBUG(dbgs() << "LV: Found a non-simple load.\n");
+          return false;
+        }
+        Loads.push_back(Ld);
+        continue;
       }
-      Loads.push_back(Ld);
-      continue;
-    }
 
-    // Save store instructions. Abort if other instructions write to memory.
-    if (I->mayWriteToMemory()) {
-      StoreInst *St = dyn_cast<StoreInst>(I);
-      if (!St) return false;
-      if (!St->isSimple()) {
-        DEBUG(dbgs() << "LV: Found a non-simple store.\n");
-        return false;
+      // Save 'store' instructions. Abort if other instructions write to memory.
+      if (it->mayWriteToMemory()) {
+        StoreInst *St = dyn_cast<StoreInst>(it);
+        if (!St) return false;
+        if (!St->isSimple()) {
+          DEBUG(dbgs() << "LV: Found a non-simple store.\n");
+          return false;
+        }
+        Stores.push_back(St);
       }
-      Stores.push_back(St);
-    }
-  } // next instr.
+    } // next instr.
+  } // next block.
 
   // Now we have two lists that hold the loads and the stores.
   // Next, we find the pointers that they use.
@@ -1628,8 +1541,8 @@ bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
   // Check if we see any stores. If there are no stores, then we don't
   // care if the pointers are *restrict*.
   if (!Stores.size()) {
-        DEBUG(dbgs() << "LV: Found a read-only loop!\n");
-        return true;
+    DEBUG(dbgs() << "LV: Found a read-only loop!\n");
+    return true;
   }
 
   // Holds the read and read-write *pointers* that we find.
@@ -1770,11 +1683,13 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
   if (Phi->getNumIncomingValues() != 2)
     return false;
 
-  // Find the possible incoming reduction variable.
-  BasicBlock *BB = Phi->getParent();
-  int SelfEdgeIdx = Phi->getBasicBlockIndex(BB);
-  int InEdgeBlockIdx = (SelfEdgeIdx ? 0 : 1); // The other entry.
-  Value *RdxStart = Phi->getIncomingValue(InEdgeBlockIdx);
+  // Reduction variables are only found in the loop header block.
+  if (Phi->getParent() != TheLoop->getHeader())
+    return false;
+
+  // Obtain the reduction start value from the value that comes from the loop
+  // preheader.
+  Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
 
   // ExitInstruction is the single value which is used outside the loop.
   // We only allow for a single reduction value to be used outside the loop.
@@ -1789,20 +1704,20 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
   // Also, we can't have multiple block-local users.
   Instruction *Iter = Phi;
   while (true) {
+    // If the instruction has no users then this is a broken
+    // chain and can't be a reduction variable.
+    if (Iter->use_empty())
+      return false;
+
     // Any reduction instr must be of one of the allowed kinds.
     if (!isReductionInstr(Iter, Kind))
       return false;
 
-    // Did we found a user inside this block ?
+    // Did we find a user inside this block ?
     bool FoundInBlockUser = false;
     // Did we reach the initial PHI node ?
     bool FoundStartPHI = false;
 
-    // If the instruction has no users then this is a broken
-    // chain and can't be a reduction variable.
-    if (Iter->use_empty())
-      return false;
-
     // For each of the *users* of iter.
     for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end();
          it != e; ++it) {
@@ -1812,14 +1727,23 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
         FoundStartPHI = true;
         continue;
       }
+
       // Check if we found the exit user.
       BasicBlock *Parent = U->getParent();
-      if (Parent != BB) {
-        // We must have a single exit instruction.
+      if (!TheLoop->contains(Parent)) {
+        // Exit if you find multiple outside users.
         if (ExitInstruction != 0)
           return false;
         ExitInstruction = Iter;
       }
+
+      // We allow in-loop PHINodes which are not the original reduction PHI
+      // node. If this PHI is the only user of Iter (happens in IF w/ no ELSE
+      // structure) then don't skip this PHI.
+      if (isa<PHINode>(U) && U->getParent() != TheLoop->getHeader() &&
+          TheLoop->contains(U) && Iter->getNumUses() > 1)
+        continue;
+
       // We can't have multiple inside users.
       if (FoundInBlockUser)
         return false;
@@ -1830,67 +1754,110 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
     // We found a reduction var if we have reached the original
     // phi node and we only have a single instruction with out-of-loop
     // users.
-   if (FoundStartPHI && ExitInstruction) {
-     // This instruction is allowed to have out-of-loop users.
-     AllowedExit.insert(ExitInstruction);
-
-     // Save the description of this reduction variable.
-     ReductionDescriptor RD(RdxStart, ExitInstruction, Kind);
-     Reductions[Phi] = RD;
-     return true;
-   }
+    if (FoundStartPHI && ExitInstruction) {
+      // This instruction is allowed to have out-of-loop users.
+      AllowedExit.insert(ExitInstruction);
+
+      // Save the description of this reduction variable.
+      ReductionDescriptor RD(RdxStart, ExitInstruction, Kind);
+      Reductions[Phi] = RD;
+      return true;
+    }
+
+    // If we've reached the start PHI but did not find an outside user then
+    // this is dead code. Abort.
+    if (FoundStartPHI)
+      return false;
   }
 }
 
 bool
 LoopVectorizationLegality::isReductionInstr(Instruction *I,
                                             ReductionKind Kind) {
-    switch (I->getOpcode()) {
-    default:
-      return false;
-    case Instruction::PHI:
-      // possibly.
-      return true;
-    case Instruction::Add:
-    case Instruction::Sub:
-      return Kind == IntegerAdd;
-    case Instruction::Mul:
-      return Kind == IntegerMult;
-    case Instruction::And:
-      return Kind == IntegerAnd;
-    case Instruction::Or:
-      return Kind == IntegerOr;
-    case Instruction::Xor:
-      return Kind == IntegerXor;
-    }
+  switch (I->getOpcode()) {
+  default:
+    return false;
+  case Instruction::PHI:
+    // possibly.
+    return true;
+  case Instruction::Add:
+  case Instruction::Sub:
+    return Kind == IntegerAdd;
+  case Instruction::Mul:
+    return Kind == IntegerMult;
+  case Instruction::And:
+    return Kind == IntegerAnd;
+  case Instruction::Or:
+    return Kind == IntegerOr;
+  case Instruction::Xor:
+    return Kind == IntegerXor;
+  }
 }
 
-bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
+LoopVectorizationLegality::InductionKind
+LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
   Type *PhiTy = Phi->getType();
   // We only handle integer and pointer inductions variables.
   if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
-    return false;
+    return NoInduction;
 
   // Check that the PHI is consecutive and starts at zero.
   const SCEV *PhiScev = SE->getSCEV(Phi);
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
   if (!AR) {
     DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
-    return false;
+    return NoInduction;
   }
   const SCEV *Step = AR->getStepRecurrence(*SE);
 
   // Integer inductions need to have a stride of one.
-  if (PhiTy->isIntegerTy())
-    return Step->isOne();
+  if (PhiTy->isIntegerTy()) {
+    if (Step->isOne())
+      return IntInduction;
+    if (Step->isAllOnesValue())
+      return ReverseIntInduction;
+    return NoInduction;
+  }
 
   // Calculate the pointer stride and check if it is consecutive.
   const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
-  if (!C) return false;
+  if (!C)
+    return NoInduction;
 
   assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
   uint64_t Size = DL->getTypeAllocSize(PhiTy->getPointerElementType());
-  return (C->getValue()->equalsInt(Size));
+  if (C->getValue()->equalsInt(Size))
+    return PtrInduction;
+
+  return NoInduction;
+}
+
+bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB)  {
+  assert(TheLoop->contains(BB) && "Unknown block used");
+
+  // Blocks that do not dominate the latch need predication.
+  BasicBlock* Latch = TheLoop->getLoopLatch();
+  return !DT->dominates(BB, Latch);
+}
+
+bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB) {
+  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+    // We don't predicate loads/stores at the moment.
+    if (it->mayReadFromMemory() || it->mayWriteToMemory() || it->mayThrow())
+      return false;
+
+    // The isntructions below can trap.
+    switch (it->getOpcode()) {
+    default: continue;
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+             return false;
+    }
+  }
+
+  return true;
 }
 
 bool LoopVectorizationLegality::hasComputableBounds(Value *Ptr) {
@@ -1930,19 +1897,29 @@ LoopVectorizationCostModel::findBestVectorizationFactor(unsigned VF) {
 }
 
 unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
-  // We can only estimate the cost of single basic block loops.
-  assert(1 == TheLoop->getNumBlocks() && "Too many blocks in loop");
-
-  BasicBlock *BB = TheLoop->getHeader();
   unsigned Cost = 0;
 
-  // For each instruction in the old loop.
-  for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
-    Instruction *Inst = it;
-    unsigned C = getInstructionCost(Inst, VF);
-    Cost += C;
-    DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF "<< VF <<
-          " For instruction: "<< *Inst << "\n");
+  // For each block.
+  for (Loop::block_iterator bb = TheLoop->block_begin(),
+       be = TheLoop->block_end(); bb != be; ++bb) {
+    unsigned BlockCost = 0;
+    BasicBlock *BB = *bb;
+
+    // For each instruction in the old loop.
+    for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
+      unsigned C = getInstructionCost(it, VF);
+      Cost += C;
+      DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF " <<
+            VF << " For instruction: "<< *it << "\n");
+    }
+
+    // We assume that if-converted blocks have a 50% chance of being executed.
+    // When the code is scalar then some of the blocks are avoided due to CF.
+    // When the code is vectorized we execute all code paths.
+    if (Legal->blockNeedsPredication(*bb) && VF == 1)
+      BlockCost /= 2;
+
+    Cost += BlockCost;
   }
 
   return Cost;
@@ -1960,147 +1937,156 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
   Type *RetTy = I->getType();
   Type *VectorTy = ToVectorTy(RetTy, VF);
 
-
   // TODO: We need to estimate the cost of intrinsic calls.
   switch (I->getOpcode()) {
-    case Instruction::GetElementPtr:
-      // We mark this instruction as zero-cost because scalar GEPs are usually
-      // lowered to the intruction addressing mode. At the moment we don't
-      // generate vector geps.
-      return 0;
-    case Instruction::Br: {
-      return VTTI->getCFInstrCost(I->getOpcode());
-    }
-    case Instruction::PHI:
-      return 0;
-    case Instruction::Add:
-    case Instruction::FAdd:
-    case Instruction::Sub:
-    case Instruction::FSub:
-    case Instruction::Mul:
-    case Instruction::FMul:
-    case Instruction::UDiv:
-    case Instruction::SDiv:
-    case Instruction::FDiv:
-    case Instruction::URem:
-    case Instruction::SRem:
-    case Instruction::FRem:
-    case Instruction::Shl:
-    case Instruction::LShr:
-    case Instruction::AShr:
-    case Instruction::And:
-    case Instruction::Or:
-    case Instruction::Xor: {
-      return VTTI->getArithmeticInstrCost(I->getOpcode(), VectorTy);
-    }
-    case Instruction::Select: {
-      SelectInst *SI = cast<SelectInst>(I);
-      const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
-      bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
-      Type *CondTy = SI->getCondition()->getType();
-      if (ScalarCond)
-        CondTy = VectorType::get(CondTy, VF);
-
-      return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);
-    }
-    case Instruction::ICmp:
-    case Instruction::FCmp: {
-      Type *ValTy = I->getOperand(0)->getType();
-      VectorTy = ToVectorTy(ValTy, VF);
-      return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy);
-    }
-    case Instruction::Store: {
-      StoreInst *SI = cast<StoreInst>(I);
-      Type *ValTy = SI->getValueOperand()->getType();
-      VectorTy = ToVectorTy(ValTy, VF);
-
-      if (VF == 1)
-        return VTTI->getMemoryOpCost(I->getOpcode(), ValTy,
-                              SI->getAlignment(), SI->getPointerAddressSpace());
-
-      // Scalarized stores.
-      if (!Legal->isConsecutivePtr(SI->getPointerOperand())) {
-        unsigned Cost = 0;
-        unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
-                                              ValTy);
-        // The cost of extracting from the value vector.
-        Cost += VF * (ExtCost);
-        // The cost of the scalar stores.
-        Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
-                                           ValTy->getScalarType(),
-                                           SI->getAlignment(),
-                                           SI->getPointerAddressSpace());
-        return Cost;
-      }
-
-      // Wide stores.
-      return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, SI->getAlignment(),
+  case Instruction::GetElementPtr:
+    // We mark this instruction as zero-cost because scalar GEPs are usually
+    // lowered to the intruction addressing mode. At the moment we don't
+    // generate vector geps.
+    return 0;
+  case Instruction::Br: {
+    return VTTI->getCFInstrCost(I->getOpcode());
+  }
+  case Instruction::PHI:
+    //TODO: IF-converted IFs become selects.
+    return 0;
+  case Instruction::Add:
+  case Instruction::FAdd:
+  case Instruction::Sub:
+  case Instruction::FSub:
+  case Instruction::Mul:
+  case Instruction::FMul:
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::FDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::FRem:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    return VTTI->getArithmeticInstrCost(I->getOpcode(), VectorTy);
+  case Instruction::Select: {
+    SelectInst *SI = cast<SelectInst>(I);
+    const SCEV *CondSCEV = SE->getSCEV(SI->getCondition());
+    bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop));
+    Type *CondTy = SI->getCondition()->getType();
+    if (ScalarCond)
+      CondTy = VectorType::get(CondTy, VF);
+
+    return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy);
+  }
+  case Instruction::ICmp:
+  case Instruction::FCmp: {
+    Type *ValTy = I->getOperand(0)->getType();
+    VectorTy = ToVectorTy(ValTy, VF);
+    return VTTI->getCmpSelInstrCost(I->getOpcode(), VectorTy);
+  }
+  case Instruction::Store: {
+    StoreInst *SI = cast<StoreInst>(I);
+    Type *ValTy = SI->getValueOperand()->getType();
+    VectorTy = ToVectorTy(ValTy, VF);
+
+    if (VF == 1)
+      return VTTI->getMemoryOpCost(I->getOpcode(), ValTy,
+                                   SI->getAlignment(),
                                    SI->getPointerAddressSpace());
+
+    // Scalarized stores.
+    if (!Legal->isConsecutivePtr(SI->getPointerOperand())) {
+      unsigned Cost = 0;
+      unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
+                                            ValTy);
+      // The cost of extracting from the value vector.
+      Cost += VF * (ExtCost);
+      // The cost of the scalar stores.
+      Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
+                                         ValTy->getScalarType(),
+                                         SI->getAlignment(),
+                                         SI->getPointerAddressSpace());
+      return Cost;
     }
-    case Instruction::Load: {
-      LoadInst *LI = cast<LoadInst>(I);
-
-      if (VF == 1)
-        return VTTI->getMemoryOpCost(I->getOpcode(), RetTy,
-                                     LI->getAlignment(),
-                                     LI->getPointerAddressSpace());
-
-      // Scalarized loads.
-      if (!Legal->isConsecutivePtr(LI->getPointerOperand())) {
-        unsigned Cost = 0;
-        unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy);
-        // The cost of inserting the loaded value into the result vector.
-        Cost += VF * (InCost);
-        // The cost of the scalar stores.
-        Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
-                                           RetTy->getScalarType(),
-                                           LI->getAlignment(),
-                                           LI->getPointerAddressSpace());
-        return Cost;
-      }
 
-      // Wide loads.
-      return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(),
+    // Wide stores.
+    return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, SI->getAlignment(),
+                                 SI->getPointerAddressSpace());
+  }
+  case Instruction::Load: {
+    LoadInst *LI = cast<LoadInst>(I);
+
+    if (VF == 1)
+      return VTTI->getMemoryOpCost(I->getOpcode(), RetTy,
+                                   LI->getAlignment(),
                                    LI->getPointerAddressSpace());
-    }
-    case Instruction::ZExt:
-    case Instruction::SExt:
-    case Instruction::FPToUI:
-    case Instruction::FPToSI:
-    case Instruction::FPExt:
-    case Instruction::PtrToInt:
-    case Instruction::IntToPtr:
-    case Instruction::SIToFP:
-    case Instruction::UIToFP:
-    case Instruction::Trunc:
-    case Instruction::FPTrunc:
-    case Instruction::BitCast: {
-      Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
-      return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
-    }
-    default: {
-      // We are scalarizing the instruction. Return the cost of the scalar
-      // instruction, plus the cost of insert and extract into vector
-      // elements, times the vector width.
+
+    // Scalarized loads.
+    if (!Legal->isConsecutivePtr(LI->getPointerOperand())) {
       unsigned Cost = 0;
+      unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy);
+      // The cost of inserting the loaded value into the result vector.
+      Cost += VF * (InCost);
+      // The cost of the scalar stores.
+      Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
+                                         RetTy->getScalarType(),
+                                         LI->getAlignment(),
+                                         LI->getPointerAddressSpace());
+      return Cost;
+    }
+
+    // Wide loads.
+    return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy, LI->getAlignment(),
+                                 LI->getPointerAddressSpace());
+  }
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::FPExt:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::SIToFP:
+  case Instruction::UIToFP:
+  case Instruction::Trunc:
+  case Instruction::FPTrunc:
+  case Instruction::BitCast: {
+    Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
+    return VTTI->getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
+  }
+  case Instruction::Call: {
+    assert(isTriviallyVectorizableIntrinsic(I));
+    IntrinsicInst *II = cast<IntrinsicInst>(I);
+    Type *RetTy = ToVectorTy(II->getType(), VF);
+    SmallVector<Type*, 4> Tys;
+    for (unsigned i = 0, ie = II->getNumArgOperands(); i != ie; ++i)
+      Tys.push_back(ToVectorTy(II->getArgOperand(i)->getType(), VF));
+    return VTTI->getIntrinsicInstrCost(II->getIntrinsicID(), RetTy, Tys);
+  }
+  default: {
+    // We are scalarizing the instruction. Return the cost of the scalar
+    // instruction, plus the cost of insert and extract into vector
+    // elements, times the vector width.
+    unsigned Cost = 0;
 
-      bool IsVoid = RetTy->isVoidTy();
+    bool IsVoid = RetTy->isVoidTy();
 
-      unsigned InsCost = (IsVoid ? 0 :
-                          VTTI->getInstrCost(Instruction::InsertElement,
-                                             VectorTy));
+    unsigned InsCost = (IsVoid ? 0 :
+                        VTTI->getInstrCost(Instruction::InsertElement,
+                                           VectorTy));
 
-      unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
-                                            VectorTy);
+    unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
+                                          VectorTy);
 
-      // The cost of inserting the results plus extracting each one of the
-      // operands.
-      Cost += VF * (InsCost + ExtCost * I->getNumOperands());
+    // The cost of inserting the results plus extracting each one of the
+    // operands.
+    Cost += VF * (InsCost + ExtCost * I->getNumOperands());
 
-      // The cost of executing VF copies of the scalar instruction.
-      Cost += VF * VTTI->getInstrCost(I->getOpcode(), RetTy);
-      return Cost;
-    }
+    // The cost of executing VF copies of the scalar instruction.
+    Cost += VF * VTTI->getInstrCost(I->getOpcode(), RetTy);
+    return Cost;
+  }
   }// end of switch.
 }
 
@@ -2110,8 +2096,6 @@ Type* LoopVectorizationCostModel::ToVectorTy(Type *Scalar, unsigned VF) {
   return VectorType::get(Scalar, VF);
 }
 
-} // namespace
-
 char LoopVectorize::ID = 0;
 static const char lv_name[] = "Loop Vectorization";
 INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
@@ -2126,3 +2110,4 @@ namespace llvm {
   }
 }
 
+
diff --git a/lib/Transforms/Vectorize/LoopVectorize.h b/lib/Transforms/Vectorize/LoopVectorize.h
new file mode 100644
index 0000000000..9d6d80e22b
--- /dev/null
+++ b/lib/Transforms/Vectorize/LoopVectorize.h
@@ -0,0 +1,458 @@
+//===- LoopVectorize.h --- A Loop Vectorizer ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
+// and generates target-independent LLVM-IR. Legalization of the IR is done
+// in the codegen. However, the vectorizes uses (will use) the codegen
+// interfaces to generate IR that is likely to result in an optimal binary.
+//
+// The loop vectorizer combines consecutive loop iteration into a single
+// 'wide' iteration. After this transformation the index is incremented
+// by the SIMD vector width, and not by one.
+//
+// This pass has three parts:
+// 1. The main loop pass that drives the different parts.
+// 2. LoopVectorizationLegality - A unit that checks for the legality
+//    of the vectorization.
+// 3. InnerLoopVectorizer - A unit that performs the actual
+//    widening of instructions.
+// 4. LoopVectorizationCostModel - A unit that checks for the profitability
+//    of vectorization. It decides on the optimal vector width, which
+//    can be one, if vectorization is not profitable.
+//
+//===----------------------------------------------------------------------===//
+//
+// The reduction-variable vectorization is based on the paper:
+//  D. Nuzman and R. Henderson. Multi-platform Auto-vectorization.
+//
+// Variable uniformity checks are inspired by:
+// Karrenberg, R. and Hack, S. Whole Function Vectorization.
+//
+// Other ideas/concepts are from:
+//  A. Zaks and D. Nuzman. Autovectorization in GCC-two years later.
+//
+//  S. Maleki, Y. Gao, M. Garzaran, T. Wong and D. Padua.  An Evaluation of
+//  Vectorizing Compilers.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORM_VECTORIZE_LOOP_VECTORIZE_H
+#define LLVM_TRANSFORM_VECTORIZE_LOOP_VECTORIZE_H
+
+#define LV_NAME "loop-vectorize"
+#define DEBUG_TYPE LV_NAME
+
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IRBuilder.h" 
+
+#include <algorithm>
+using namespace llvm;
+
+/// We don't vectorize loops with a known constant trip count below this number.
+const unsigned TinyTripCountThreshold = 16;
+
+/// When performing a runtime memory check, do not check more than this
+/// number of pointers. Notice that the check is quadratic!
+const unsigned RuntimeMemoryCheckThreshold = 4;
+
+/// This is the highest vector width that we try to generate.
+const unsigned MaxVectorSize = 8;
+
+namespace llvm {
+
+// Forward declarations.
+class LoopVectorizationLegality;
+class LoopVectorizationCostModel;
+class VectorTargetTransformInfo;
+
+/// InnerLoopVectorizer vectorizes loops which contain only one basic
+/// block to a specified vectorization factor (VF).
+/// This class performs the widening of scalars into vectors, or multiple
+/// scalars. This class also implements the following features:
+/// * It inserts an epilogue loop for handling loops that don't have iteration
+///   counts that are known to be a multiple of the vectorization factor.
+/// * It handles the code generation for reduction variables.
+/// * Scalarization (implementation using scalars) of un-vectorizable
+///   instructions.
+/// InnerLoopVectorizer does not perform any vectorization-legality
+/// checks, and relies on the caller to check for the different legality
+/// aspects. The InnerLoopVectorizer relies on the
+/// LoopVectorizationLegality class to provide information about the induction
+/// and reduction variables that were found to a given vectorization factor.
+class InnerLoopVectorizer {
+public:
+  /// Ctor.
+  InnerLoopVectorizer(Loop *Orig, ScalarEvolution *Se, LoopInfo *Li,
+                      DominatorTree *Dt, DataLayout *Dl, unsigned VecWidth):
+  OrigLoop(Orig), SE(Se), LI(Li), DT(Dt), DL(Dl), VF(VecWidth),
+  Builder(Se->getContext()), Induction(0), OldInduction(0) { }
+
+  // Perform the actual loop widening (vectorization).
+  void vectorize(LoopVectorizationLegality *Legal) {
+    // Create a new empty loop. Unlink the old loop and connect the new one.
+    createEmptyLoop(Legal);
+    // Widen each instruction in the old loop to a new one in the new loop.
+    // Use the Legality module to find the induction and reduction variables.
+    vectorizeLoop(Legal);
+    // Register the new loop and update the analysis passes.
+    updateAnalysis();
+  }
+
+private:
+  /// A small list of PHINodes.
+  typedef SmallVector<PHINode*, 4> PhiVector;
+
+  /// Add code that checks at runtime if the accessed arrays overlap.
+  /// Returns the comparator value or NULL if no check is needed.
+  Value *addRuntimeCheck(LoopVectorizationLegality *Legal,
+                         Instruction *Loc);
+  /// Create an empty loop, based on the loop ranges of the old loop.
+  void createEmptyLoop(LoopVectorizationLegality *Legal);
+  /// Copy and widen the instructions from the old loop.
+  void vectorizeLoop(LoopVectorizationLegality *Legal);
+
+  /// A helper function that computes the predicate of the block BB, assuming
+  /// that the header block of the loop is set to True. It returns the *entry*
+  /// mask for the block BB.
+  Value *createBlockInMask(BasicBlock *BB);
+  /// A helper function that computes the predicate of the edge between SRC
+  /// and DST.
+  Value *createEdgeMask(BasicBlock *Src, BasicBlock *Dst);
+
+  /// A helper function to vectorize a single BB within the innermost loop.
+  void vectorizeBlockInLoop(LoopVectorizationLegality *Legal, BasicBlock *BB,
+                            PhiVector *PV);
+
+  /// Insert the new loop to the loop hierarchy and pass manager
+  /// and update the analysis passes.
+  void updateAnalysis();
+
+  /// This instruction is un-vectorizable. Implement it as a sequence
+  /// of scalars.
+  void scalarizeInstruction(Instruction *Instr);
+
+  /// Create a broadcast instruction. This method generates a broadcast
+  /// instruction (shuffle) for loop invariant values and for the induction
+  /// value. If this is the induction variable then we extend it to N, N+1, ...
+  /// this is needed because each iteration in the loop corresponds to a SIMD
+  /// element.
+  Value *getBroadcastInstrs(Value *V);
+
+  /// This function adds 0, 1, 2 ... to each vector element, starting at zero.
+  /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...).
+  Value *getConsecutiveVector(Value* Val, bool Negate = false);
+
+  /// When we go over instructions in the basic block we rely on previous
+  /// values within the current basic block or on loop invariant values.
+  /// When we widen (vectorize) values we place them in the map. If the values
+  /// are not within the map, they have to be loop invariant, so we simply
+  /// broadcast them into a vector.
+  Value *getVectorValue(Value *V);
+
+  /// Get a uniform vector of constant integers. We use this to get
+  /// vectors of ones and zeros for the reduction code.
+  Constant* getUniformVector(unsigned Val, Type* ScalarTy);
+
+  typedef DenseMap<Value*, Value*> ValueMap;
+
+  /// The original loop.
+  Loop *OrigLoop;
+  // Scev analysis to use.
+  ScalarEvolution *SE;
+  // Loop Info.
+  LoopInfo *LI;
+  // Dominator Tree.
+  DominatorTree *DT;
+  // Data Layout.
+  DataLayout *DL;
+  // The vectorization factor to use.
+  unsigned VF;
+
+  // The builder that we use
+  IRBuilder<> Builder;
+
+  // --- Vectorization state ---
+
+  /// The vector-loop preheader.
+  BasicBlock *LoopVectorPreHeader;
+  /// The scalar-loop preheader.
+  BasicBlock *LoopScalarPreHeader;
+  /// Middle Block between the vector and the scalar.
+  BasicBlock *LoopMiddleBlock;
+  ///The ExitBlock of the scalar loop.
+  BasicBlock *LoopExitBlock;
+  ///The vector loop body.
+  BasicBlock *LoopVectorBody;
+  ///The scalar loop body.
+  BasicBlock *LoopScalarBody;
+  ///The first bypass block.
+  BasicBlock *LoopBypassBlock;
+
+  /// The new Induction variable which was added to the new block.
+  PHINode *Induction;
+  /// The induction variable of the old basic block.
+  PHINode *OldInduction;
+  // Maps scalars to widened vectors.
+  ValueMap WidenMap;
+};
+
+/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
+/// to what vectorization factor.
+/// This class does not look at the profitability of vectorization, only the
+/// legality. This class has two main kinds of checks:
+/// * Memory checks - The code in canVectorizeMemory checks if vectorization
+///   will change the order of memory accesses in a way that will change the
+///   correctness of the program.
+/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
+/// checks for a number of different conditions, such as the availability of a
+/// single induction variable, that all types are supported and vectorize-able,
+/// etc. This code reflects the capabilities of InnerLoopVectorizer.
+/// This class is also used by InnerLoopVectorizer for identifying
+/// induction variable and the different reduction variables.
+class LoopVectorizationLegality {
+public:
+  LoopVectorizationLegality(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl,
+                            DominatorTree *Dt):
+  TheLoop(Lp), SE(Se), DL(Dl), DT(Dt), Induction(0) { }
+
+  /// This enum represents the kinds of reductions that we support.
+  enum ReductionKind {
+    NoReduction, /// Not a reduction.
+    IntegerAdd,  /// Sum of numbers.
+    IntegerMult, /// Product of numbers.
+    IntegerOr,   /// Bitwise or logical OR of numbers.
+    IntegerAnd,  /// Bitwise or logical AND of numbers.
+    IntegerXor   /// Bitwise or logical XOR of numbers.
+  };
+
+  /// This enum represents the kinds of inductions that we support.
+  enum InductionKind {
+    NoInduction,         /// Not an induction variable.
+    IntInduction,        /// Integer induction variable. Step = 1.
+    ReverseIntInduction, /// Reverse int induction variable. Step = -1.
+    PtrInduction         /// Pointer induction variable. Step = sizeof(elem).
+  };
+
+  /// This POD struct holds information about reduction variables.
+  struct ReductionDescriptor {
+    // Default C'tor
+    ReductionDescriptor():
+    StartValue(0), LoopExitInstr(0), Kind(NoReduction) {}
+
+    // C'tor.
+    ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K):
+    StartValue(Start), LoopExitInstr(Exit), Kind(K) {}
+
+    // The starting value of the reduction.
+    // It does not have to be zero!
+    Value *StartValue;
+    // The instruction who's value is used outside the loop.
+    Instruction *LoopExitInstr;
+    // The kind of the reduction.
+    ReductionKind Kind;
+  };
+
+  // This POD struct holds information about the memory runtime legality
+  // check that a group of pointers do not overlap.
+  struct RuntimePointerCheck {
+    RuntimePointerCheck(): Need(false) {}
+
+    /// Reset the state of the pointer runtime information.
+    void reset() {
+      Need = false;
+      Pointers.clear();
+      Starts.clear();
+      Ends.clear();
+    }
+
+    /// Insert a pointer and calculate the start and end SCEVs.
+    void insert(ScalarEvolution *SE, Loop *Lp, Value *Ptr);
+
+    /// This flag indicates if we need to add the runtime check.
+    bool Need;
+    /// Holds the pointers that we need to check.
+    SmallVector<Value*, 2> Pointers;
+    /// Holds the pointer value at the beginning of the loop.
+    SmallVector<const SCEV*, 2> Starts;
+    /// Holds the pointer value at the end of the loop.
+    SmallVector<const SCEV*, 2> Ends;
+  };
+
+  /// A POD for saving information about induction variables.
+  struct InductionInfo {
+    /// Ctors.
+    InductionInfo(Value *Start, InductionKind K):
+    StartValue(Start), IK(K) {};
+    InductionInfo(): StartValue(0), IK(NoInduction) {};
+    /// Start value.
+    Value *StartValue;
+    /// Induction kind.
+    InductionKind IK;
+  };
+
+  /// ReductionList contains the reduction descriptors for all
+  /// of the reductions that were found in the loop.
+  typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
+
+  /// InductionList saves induction variables and maps them to the
+  /// induction descriptor.
+  typedef DenseMap<PHINode*, InductionInfo> InductionList;
+
+  /// Returns true if it is legal to vectorize this loop.
+  /// This does not mean that it is profitable to vectorize this
+  /// loop, only that it is legal to do so.
+  bool canVectorize();
+
+  /// Returns the Induction variable.
+  PHINode *getInduction() {return Induction;}
+
+  /// Returns the reduction variables found in the loop.
+  ReductionList *getReductionVars() { return &Reductions; }
+
+  /// Returns the induction variables found in the loop.
+  InductionList *getInductionVars() { return &Inductions; }
+
+  /// Return true if the block BB needs to be predicated in order for the loop
+  /// to be vectorized.
+  bool blockNeedsPredication(BasicBlock *BB);
+
+  /// Check if this  pointer is consecutive when vectorizing. This happens
+  /// when the last index of the GEP is the induction variable, or that the
+  /// pointer itself is an induction variable.
+  /// This check allows us to vectorize A[idx] into a wide load/store.
+  bool isConsecutivePtr(Value *Ptr);
+
+  /// Returns true if the value V is uniform within the loop.
+  bool isUniform(Value *V);
+
+  /// Returns true if this instruction will remain scalar after vectorization.
+  bool isUniformAfterVectorization(Instruction* I) {return Uniforms.count(I);}
+
+  /// Returns the information that we collected about runtime memory check.
+  RuntimePointerCheck *getRuntimePointerCheck() {return &PtrRtCheck; }
+private:
+  /// Check if a single basic block loop is vectorizable.
+  /// At this point we know that this is a loop with a constant trip count
+  /// and we only need to check individual instructions.
+  bool canVectorizeInstrs();
+
+  /// When we vectorize loops we may change the order in which
+  /// we read and write from memory. This method checks if it is
+  /// legal to vectorize the code, considering only memory constrains.
+  /// Returns true if the loop is vectorizable
+  bool canVectorizeMemory();
+
+  /// Return true if we can vectorize this loop using the IF-conversion
+  /// transformation.
+  bool canVectorizeWithIfConvert();
+
+  /// Collect the variables that need to stay uniform after vectorization.
+  void collectLoopUniforms();
+
+  /// Return true if all of the instructions in the block can be speculatively
+  /// executed.
+  bool blockCanBePredicated(BasicBlock *BB);
+
+  /// Returns True, if 'Phi' is the kind of reduction variable for type
+  /// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
+  bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
+  /// Returns true if the instruction I can be a reduction variable of type
+  /// 'Kind'.
+  bool isReductionInstr(Instruction *I, ReductionKind Kind);
+  /// Returns the induction kind of Phi. This function may return NoInduction
+  /// if the PHI is not an induction variable.
+  InductionKind isInductionVariable(PHINode *Phi);
+  /// Return true if can compute the address bounds of Ptr within the loop.
+  bool hasComputableBounds(Value *Ptr);
+
+  /// The loop that we evaluate.
+  Loop *TheLoop;
+  /// Scev analysis.
+  ScalarEvolution *SE;
+  /// DataLayout analysis.
+  DataLayout *DL;
+  // Dominators.
+  DominatorTree *DT;
+
+  //  ---  vectorization state --- //
+
+  /// Holds the integer induction variable. This is the counter of the
+  /// loop.
+  PHINode *Induction;
+  /// Holds the reduction variables.
+  ReductionList Reductions;
+  /// Holds all of the induction variables that we found in the loop.
+  /// Notice that inductions don't need to start at zero and that induction
+  /// variables can be pointers.
+  InductionList Inductions;
+
+  /// Allowed outside users. This holds the reduction
+  /// vars which can be accessed from outside the loop.
+  SmallPtrSet<Value*, 4> AllowedExit;
+  /// This set holds the variables which are known to be uniform after
+  /// vectorization.
+  SmallPtrSet<Instruction*, 4> Uniforms;
+  /// We need to check that all of the pointers in this list are disjoint
+  /// at runtime.
+  RuntimePointerCheck PtrRtCheck;
+};
+
+/// LoopVectorizationCostModel - estimates the expected speedups due to
+/// vectorization.
+/// In many cases vectorization is not profitable. This can happen because
+/// of a number of reasons. In this class we mainly attempt to predict
+/// the expected speedup/slowdowns due to the supported instruction set.
+/// We use the VectorTargetTransformInfo to query the different backends
+/// for the cost of different operations.
+class LoopVectorizationCostModel {
+public:
+  /// C'tor.
+  LoopVectorizationCostModel(Loop *Lp, ScalarEvolution *Se,
+                             LoopVectorizationLegality *Leg,
+                             const VectorTargetTransformInfo *Vtti):
+  TheLoop(Lp), SE(Se), Legal(Leg), VTTI(Vtti) { }
+
+  /// Returns the most profitable vectorization factor for the loop that is
+  /// smaller or equal to the VF argument. This method checks every power
+  /// of two up to VF.
+  unsigned findBestVectorizationFactor(unsigned VF = MaxVectorSize);
+
+private:
+  /// Returns the expected execution cost. The unit of the cost does
+  /// not matter because we use the 'cost' units to compare different
+  /// vector widths. The cost that is returned is *not* normalized by
+  /// the factor width.
+  unsigned expectedCost(unsigned VF);
+
+  /// Returns the execution time cost of an instruction for a given vector
+  /// width. Vector width of one means scalar.
+  unsigned getInstructionCost(Instruction *I, unsigned VF);
+
+  /// A helper function for converting Scalar types to vector types.
+  /// If the incoming type is void, we return void. If the VF is 1, we return
+  /// the scalar type.
+  static Type* ToVectorTy(Type *Scalar, unsigned VF);
+
+  /// The loop that we evaluate.
+  Loop *TheLoop;
+  /// Scev analysis.
+  ScalarEvolution *SE;
+
+  /// Vectorization legality.
+  LoopVectorizationLegality *Legal;
+  /// Vector target information.
+  const VectorTargetTransformInfo *VTTI;
+};
+
+}// namespace llvm
+
+#endif //LLVM_TRANSFORM_VECTORIZE_LOOP_VECTORIZE_H
+
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
index d26973a7b3..3fb36cadea 100644
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -13,13 +13,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm-c/Transforms/Vectorize.h"
+#include "llvm/Transforms/Vectorize.h"
 #include "llvm-c/Initialization.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/PassManager.h"
+#include "llvm-c/Transforms/Vectorize.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
-#include "llvm/Transforms/Vectorize.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
 
 using namespace llvm;
 
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index c45a04f12b..4ad08ff8a2 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -15,29 +15,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Assembly/Writer.h"
-#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Assembly/AssemblyAnnotationWriter.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/Operator.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
-#include "llvm/TypeFinder.h"
-#include "llvm/ValueSymbolTable.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Operator.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/TypeFinder.h"
+#include "llvm/ValueSymbolTable.h"
 #include <algorithm>
 #include <cctype>
 using namespace llvm;
@@ -703,6 +703,22 @@ static void writeAtomicRMWOperation(raw_ostream &Out,
 }
 
 static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
+  if (const FPMathOperator *FPO = dyn_cast<const FPMathOperator>(U)) {
+    // Unsafe algebra implies all the others, no need to write them all out
+    if (FPO->hasUnsafeAlgebra())
+      Out << " fast";
+    else {
+      if (FPO->hasNoNaNs())
+        Out << " nnan";
+      if (FPO->hasNoInfs())
+        Out << " ninf";
+      if (FPO->hasNoSignedZeros())
+        Out << " nsz";
+      if (FPO->hasAllowReciprocal())
+        Out << " arcp";
+    }
+  }
+
   if (const OverflowingBinaryOperator *OBO =
         dyn_cast<OverflowingBinaryOperator>(U)) {
     if (OBO->hasNoUnsignedWrap())
@@ -1287,21 +1303,6 @@ void AssemblyWriter::printModule(const Module *M) {
     }
   }
 
-  // Loop over the dependent libraries and emit them.
-  Module::lib_iterator LI = M->lib_begin();
-  Module::lib_iterator LE = M->lib_end();
-  if (LI != LE) {
-    Out << '\n';
-    Out << "deplibs = [ ";
-    while (LI != LE) {
-      Out << '"' << *LI << '"';
-      ++LI;
-      if (LI != LE)
-        Out << ", ";
-    }
-    Out << " ]";
-  }
-
   printTypeIdentities();
 
   // Output all globals.
@@ -1555,7 +1556,7 @@ void AssemblyWriter::printFunction(const Function *F) {
   }
 
   FunctionType *FT = F->getFunctionType();
-  const AttrListPtr &Attrs = F->getAttributes();
+  const AttributeSet &Attrs = F->getAttributes();
   Attributes RetAttrs = Attrs.getRetAttributes();
   if (RetAttrs.hasAttributes())
     Out <<  Attrs.getRetAttributes().getAsString() << ' ';
@@ -1848,7 +1849,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     PointerType *PTy = cast<PointerType>(Operand->getType());
     FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
     Type *RetTy = FTy->getReturnType();
-    const AttrListPtr &PAL = CI->getAttributes();
+    const AttributeSet &PAL = CI->getAttributes();
 
     if (PAL.getRetAttributes().hasAttributes())
       Out << ' ' << PAL.getRetAttributes().getAsString();
@@ -1881,7 +1882,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     PointerType *PTy = cast<PointerType>(Operand->getType());
     FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
     Type *RetTy = FTy->getReturnType();
-    const AttrListPtr &PAL = II->getAttributes();
+    const AttributeSet &PAL = II->getAttributes();
 
     // Print the calling convention being used.
     if (II->getCallingConv() != CallingConv::C) {
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index 55722bcf75..751ff85f21 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -8,21 +8,21 @@
 //===----------------------------------------------------------------------===//
 //
 // This file implements the Attributes, AttributeImpl, AttrBuilder,
-// AttributeListImpl, and AttrListPtr classes.
+// AttributeListImpl, and AttributeSet classes.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Attributes.h"
 #include "AttributesImpl.h"
 #include "LLVMContextImpl.h"
-#include "llvm/Type.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Atomic.h"
-#include "llvm/Support/Mutex.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -355,11 +355,11 @@ uint64_t AttributesImpl::getStackAlignment() const {
 // AttributeListImpl Definition
 //===----------------------------------------------------------------------===//
 
-AttrListPtr AttrListPtr::get(LLVMContext &C,
+AttributeSet AttributeSet::get(LLVMContext &C,
                              ArrayRef<AttributeWithIndex> Attrs) {
   // If there are no attributes then return a null AttributesList pointer.
   if (Attrs.empty())
-    return AttrListPtr();
+    return AttributeSet();
 
 #ifndef NDEBUG
   for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
@@ -387,14 +387,14 @@ AttrListPtr AttrListPtr::get(LLVMContext &C,
   }
 
   // Return the AttributesList that we found or created.
-  return AttrListPtr(PA);
+  return AttributeSet(PA);
 }
 
 //===----------------------------------------------------------------------===//
-// AttrListPtr Method Implementations
+// AttributeSet Method Implementations
 //===----------------------------------------------------------------------===//
 
-const AttrListPtr &AttrListPtr::operator=(const AttrListPtr &RHS) {
+const AttributeSet &AttributeSet::operator=(const AttributeSet &RHS) {
   if (AttrList == RHS.AttrList) return *this;
 
   AttrList = RHS.AttrList;
@@ -404,13 +404,13 @@ const AttrListPtr &AttrListPtr::operator=(const AttrListPtr &RHS) {
 /// getNumSlots - Return the number of slots used in this attribute list.
 /// This is the number of arguments that have an attribute set on them
 /// (including the function itself).
-unsigned AttrListPtr::getNumSlots() const {
+unsigned AttributeSet::getNumSlots() const {
   return AttrList ? AttrList->Attrs.size() : 0;
 }
 
 /// getSlot - Return the AttributeWithIndex at the specified slot.  This
 /// holds a number plus a set of attributes.
-const AttributeWithIndex &AttrListPtr::getSlot(unsigned Slot) const {
+const AttributeWithIndex &AttributeSet::getSlot(unsigned Slot) const {
   assert(AttrList && Slot < AttrList->Attrs.size() && "Slot # out of range!");
   return AttrList->Attrs[Slot];
 }
@@ -418,7 +418,7 @@ const AttributeWithIndex &AttrListPtr::getSlot(unsigned Slot) const {
 /// getAttributes - The attributes for the specified index are returned.
 /// Attributes for the result are denoted with Idx = 0.  Function notes are
 /// denoted with idx = ~0.
-Attributes AttrListPtr::getAttributes(unsigned Idx) const {
+Attributes AttributeSet::getAttributes(unsigned Idx) const {
   if (AttrList == 0) return Attributes();
 
   const SmallVector<AttributeWithIndex, 4> &Attrs = AttrList->Attrs;
@@ -431,7 +431,7 @@ Attributes AttrListPtr::getAttributes(unsigned Idx) const {
 
 /// hasAttrSomewhere - Return true if the specified attribute is set for at
 /// least one parameter or for the return value.
-bool AttrListPtr::hasAttrSomewhere(Attributes::AttrVal Attr) const {
+bool AttributeSet::hasAttrSomewhere(Attributes::AttrVal Attr) const {
   if (AttrList == 0) return false;
 
   const SmallVector<AttributeWithIndex, 4> &Attrs = AttrList->Attrs;
@@ -442,17 +442,17 @@ bool AttrListPtr::hasAttrSomewhere(Attributes::AttrVal Attr) const {
   return false;
 }
 
-unsigned AttrListPtr::getNumAttrs() const {
+unsigned AttributeSet::getNumAttrs() const {
   return AttrList ? AttrList->Attrs.size() : 0;
 }
 
-Attributes &AttrListPtr::getAttributesAtIndex(unsigned i) const {
+Attributes &AttributeSet::getAttributesAtIndex(unsigned i) const {
   assert(AttrList && "Trying to get an attribute from an empty list!");
   assert(i < AttrList->Attrs.size() && "Index out of range!");
   return AttrList->Attrs[i].Attrs;
 }
 
-AttrListPtr AttrListPtr::addAttr(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::addAttr(LLVMContext &C, unsigned Idx,
                                  Attributes Attrs) const {
   Attributes OldAttrs = getAttributes(Idx);
 #ifndef NDEBUG
@@ -497,7 +497,7 @@ AttrListPtr AttrListPtr::addAttr(LLVMContext &C, unsigned Idx,
   return get(C, NewAttrList);
 }
 
-AttrListPtr AttrListPtr::removeAttr(LLVMContext &C, unsigned Idx,
+AttributeSet AttributeSet::removeAttr(LLVMContext &C, unsigned Idx,
                                     Attributes Attrs) const {
 #ifndef NDEBUG
   // FIXME it is not obvious how this should work for alignment.
@@ -505,7 +505,7 @@ AttrListPtr AttrListPtr::removeAttr(LLVMContext &C, unsigned Idx,
   assert(!Attrs.hasAttribute(Attributes::Alignment) &&
          "Attempt to exclude alignment!");
 #endif
-  if (AttrList == 0) return AttrListPtr();
+  if (AttrList == 0) return AttributeSet();
 
   Attributes OldAttrs = getAttributes(Idx);
   AttrBuilder NewAttrs =
@@ -536,7 +536,7 @@ AttrListPtr AttrListPtr::removeAttr(LLVMContext &C, unsigned Idx,
   return get(C, NewAttrList);
 }
 
-void AttrListPtr::dump() const {
+void AttributeSet::dump() const {
   dbgs() << "PAL[ ";
   for (unsigned i = 0; i < getNumSlots(); ++i) {
     const AttributeWithIndex &PAWI = getSlot(i);
diff --git a/lib/VMCore/AttributesImpl.h b/lib/VMCore/AttributesImpl.h
index 5c107e1ebb..193e873717 100644
--- a/lib/VMCore/AttributesImpl.h
+++ b/lib/VMCore/AttributesImpl.h
@@ -15,8 +15,8 @@
 #ifndef LLVM_ATTRIBUTESIMPL_H
 #define LLVM_ATTRIBUTESIMPL_H
 
-#include "llvm/Attributes.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/Attributes.h"
 
 namespace llvm {
 
diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp
index d353b0adcf..f23a49696e 100644
--- a/lib/VMCore/BasicBlock.cpp
+++ b/lib/VMCore/BasicBlock.cpp
@@ -12,15 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/BasicBlock.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/Type.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/LeakDetector.h"
-#include "SymbolTableListTraitsImpl.h"
+#include "llvm/Type.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index fe3edac42e..dedae5a83f 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -18,14 +18,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "ConstantFold.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Constants.h"
-#include "llvm/Instructions.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalAlias.h"
 #include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
 #include "llvm/Operator.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index a4514309b2..008378a241 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -12,26 +12,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Constants.h"
-#include "LLVMContextImpl.h"
 #include "ConstantFold.h"
+#include "LLVMContextImpl.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Operator.h"
-#include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include <algorithm>
 #include <cstdarg>
 using namespace llvm;
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 847bc134dd..0fb37cb0b4 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -17,11 +17,11 @@
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/GlobalVariable.h"
 #include "llvm/GlobalAlias.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
@@ -1380,27 +1380,27 @@ void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
 
 void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
   Function *Func = unwrap<Function>(Fn);
-  const AttrListPtr PAL = Func->getAttributes();
+  const AttributeSet PAL = Func->getAttributes();
   AttrBuilder B(PA);
-  const AttrListPtr PALnew =
-    PAL.addAttr(Func->getContext(), AttrListPtr::FunctionIndex,
+  const AttributeSet PALnew =
+    PAL.addAttr(Func->getContext(), AttributeSet::FunctionIndex,
                 Attributes::get(Func->getContext(), B));
   Func->setAttributes(PALnew);
 }
 
 void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
   Function *Func = unwrap<Function>(Fn);
-  const AttrListPtr PAL = Func->getAttributes();
+  const AttributeSet PAL = Func->getAttributes();
   AttrBuilder B(PA);
-  const AttrListPtr PALnew =
-    PAL.removeAttr(Func->getContext(), AttrListPtr::FunctionIndex,
+  const AttributeSet PALnew =
+    PAL.removeAttr(Func->getContext(), AttributeSet::FunctionIndex,
                    Attributes::get(Func->getContext(), B));
   Func->setAttributes(PALnew);
 }
 
 LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn) {
   Function *Func = unwrap<Function>(Fn);
-  const AttrListPtr PAL = Func->getAttributes();
+  const AttributeSet PAL = Func->getAttributes();
   Attributes attr = PAL.getFnAttributes();
   return (LLVMAttribute)attr.Raw();
 }
diff --git a/lib/VMCore/DIBuilder.cpp b/lib/VMCore/DIBuilder.cpp
index 152b825523..b3bbde86c5 100644
--- a/lib/VMCore/DIBuilder.cpp
+++ b/lib/VMCore/DIBuilder.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DIBuilder.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Constants.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/Module.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 
@@ -741,11 +741,11 @@ DIArray DIBuilder::getOrCreateArray(ArrayRef<Value *> Elements) {
 
 /// getOrCreateSubrange - Create a descriptor for a value range.  This
 /// implicitly uniques the values returned.
-DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) {
+DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) {
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type),
     ConstantInt::get(Type::getInt64Ty(VMContext), Lo),
-    ConstantInt::get(Type::getInt64Ty(VMContext), Hi)
+    ConstantInt::get(Type::getInt64Ty(VMContext), Count)
   };
 
   return DISubrange(MDNode::get(VMContext, Elts));
diff --git a/lib/VMCore/DataLayout.cpp b/lib/VMCore/DataLayout.cpp
index e21868be44..b0ac62686d 100644
--- a/lib/VMCore/DataLayout.cpp
+++ b/lib/VMCore/DataLayout.cpp
@@ -17,16 +17,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DataLayout.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Mutex.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cstdlib>
 using namespace llvm;
@@ -152,13 +152,6 @@ DataLayout::InvalidPointerElem = PointerAlignElem::get(~0U, 0U, 0U, 0U);
 //                       DataLayout Class Implementation
 //===----------------------------------------------------------------------===//
 
-/// getInt - Get an integer ignoring errors.
-static int getInt(StringRef R) {
-  int Result = 0;
-  R.getAsInteger(10, Result);
-  return Result;
-}
-
 void DataLayout::init(StringRef Desc) {
   initializeDataLayoutPass(*PassRegistry::getPassRegistry());
 
@@ -181,25 +174,51 @@ void DataLayout::init(StringRef Desc) {
   setAlignment(AGGREGATE_ALIGN, 0,  8,  0);  // struct
   setPointerAlignment(0, 8, 8, 8);
 
-  std::string errMsg = parseSpecifier(Desc);
-  assert(errMsg == "" && "Invalid target data layout string.");
-  (void)errMsg;
+  parseSpecifier(Desc);
 }
 
-std::string DataLayout::parseSpecifier(StringRef Desc) {
+/// Checked version of split, to ensure mandatory subparts.
+static std::pair<StringRef, StringRef> split(StringRef Str, char Separator) {
+  assert(!Str.empty() && "parse error, string can't be empty here");
+  std::pair<StringRef, StringRef> Split = Str.split(Separator);
+  assert((!Split.second.empty() || Split.first == Str) &&
+         "a trailing separator is not allowed");
+  return Split;
+}
+
+/// Get an unsinged integer, including error checks.
+static unsigned getInt(StringRef R) {
+  unsigned Result;
+  bool error = R.getAsInteger(10, Result); (void)error;
+  assert(!error && "not a number, or does not fit in an unsigned int");
+  return Result;
+}
+
+/// Convert bits into bytes. Assert if not a byte width multiple.
+static unsigned inBytes(unsigned Bits) {
+  assert(Bits % 8 == 0 && "number of bits must be a byte width multiple");
+  return Bits / 8;
+}
+
+void DataLayout::parseSpecifier(StringRef Desc) {
 
   while (!Desc.empty()) {
-    std::pair<StringRef, StringRef> Split = Desc.split('-');
-    StringRef Token = Split.first;
+
+    // Split at '-'.
+    std::pair<StringRef, StringRef> Split = split(Desc, '-');
     Desc = Split.second;
 
-    Split = Token.split(':');
-    StringRef Specifier = Split.first;
-    Token = Split.second;
+    // Split at ':'.
+    Split = split(Split.first, ':');
 
-    assert(!Specifier.empty() && "Can't be empty here");
+    // Aliases used below.
+    StringRef &Tok  = Split.first;  // Current token.
+    StringRef &Rest = Split.second; // The rest of the string.
 
-    switch (Specifier[0]) {
+    char Specifier = Tok.front();
+    Tok = Tok.substr(1);
+
+    switch (Specifier) {
     case 'E':
       LittleEndian = false;
       break;
@@ -207,37 +226,28 @@ std::string DataLayout::parseSpecifier(StringRef Desc) {
       LittleEndian = true;
       break;
     case 'p': {
-      int AddrSpace = 0;
-      if (Specifier.size() > 1) {
-        AddrSpace = getInt(Specifier.substr(1));
-        if (AddrSpace < 0 || AddrSpace > (1 << 24))
-          return "Invalid address space, must be a positive 24bit integer";
-      }
-      Split = Token.split(':');
-      int PointerMemSizeBits = getInt(Split.first);
-      if (PointerMemSizeBits < 0 || PointerMemSizeBits % 8 != 0)
-        return "invalid pointer size, must be a positive 8-bit multiple";
-
-      // Pointer ABI alignment.
-      Split = Split.second.split(':');
-      int PointerABIAlignBits = getInt(Split.first);
-      if (PointerABIAlignBits < 0 || PointerABIAlignBits % 8 != 0) {
-        return "invalid pointer ABI alignment, "
-               "must be a positive 8-bit multiple";
-      }
-
-      // Pointer preferred alignment.
-      Split = Split.second.split(':');
-      int PointerPrefAlignBits = getInt(Split.first);
-      if (PointerPrefAlignBits < 0 || PointerPrefAlignBits % 8 != 0) {
-        return "invalid pointer preferred alignment, "
-               "must be a positive 8-bit multiple";
+      // Address space.
+      unsigned AddrSpace = Tok.empty() ? 0 : getInt(Tok);
+      assert(AddrSpace < 1 << 24 &&
+             "Invalid address space, must be a 24bit integer");
+
+      // Size.
+      Split = split(Rest, ':');
+      unsigned PointerMemSize = inBytes(getInt(Tok));
+
+      // ABI alignment.
+      Split = split(Rest, ':');
+      unsigned PointerABIAlign = inBytes(getInt(Tok));
+
+      // Preferred alignment.
+      unsigned PointerPrefAlign = PointerABIAlign;
+      if (!Rest.empty()) {
+        Split = split(Rest, ':');
+        PointerPrefAlign = inBytes(getInt(Tok));
       }
 
-      if (PointerPrefAlignBits == 0)
-        PointerPrefAlignBits = PointerABIAlignBits;
-      setPointerAlignment(AddrSpace, PointerABIAlignBits/8,
-                          PointerPrefAlignBits/8, PointerMemSizeBits/8);
+      setPointerAlignment(AddrSpace, PointerABIAlign, PointerPrefAlign,
+                          PointerMemSize);
       break;
     }
     case 'i':
@@ -246,8 +256,7 @@ std::string DataLayout::parseSpecifier(StringRef Desc) {
     case 'a':
     case 's': {
       AlignTypeEnum AlignType;
-      char field = Specifier[0];
-      switch (field) {
+      switch (Specifier) {
       default:
       case 'i': AlignType = INTEGER_ALIGN; break;
       case 'v': AlignType = VECTOR_ALIGN; break;
@@ -255,64 +264,44 @@ std::string DataLayout::parseSpecifier(StringRef Desc) {
       case 'a': AlignType = AGGREGATE_ALIGN; break;
       case 's': AlignType = STACK_ALIGN; break;
       }
-      int Size = getInt(Specifier.substr(1));
-      if (Size < 0) {
-        return std::string("invalid ") + field + "-size field, "
-               "must be positive";
-      }
 
-      Split = Token.split(':');
-      int ABIAlignBits = getInt(Split.first);
-      if (ABIAlignBits < 0 || ABIAlignBits % 8 != 0) {
-        return std::string("invalid ") + field +"-abi-alignment field, "
-               "must be a positive 8-bit multiple";
-      }
-      unsigned ABIAlign = ABIAlignBits / 8;
+      // Bit size.
+      unsigned Size = Tok.empty() ? 0 : getInt(Tok);
 
-      Split = Split.second.split(':');
+      // ABI alignment.
+      Split = split(Rest, ':');
+      unsigned ABIAlign = inBytes(getInt(Tok));
 
-      int PrefAlignBits = getInt(Split.first);
-      if (PrefAlignBits < 0 || PrefAlignBits % 8 != 0) {
-        return std::string("invalid ") + field +"-preferred-alignment field, "
-               "must be a positive 8-bit multiple";
+      // Preferred alignment.
+      unsigned PrefAlign = ABIAlign;
+      if (!Rest.empty()) {
+        Split = split(Rest, ':');
+        PrefAlign = inBytes(getInt(Tok));
       }
-      unsigned PrefAlign = PrefAlignBits / 8;
-      if (PrefAlign == 0)
-        PrefAlign = ABIAlign;
+
       setAlignment(AlignType, ABIAlign, PrefAlign, Size);
 
       break;
     }
     case 'n':  // Native integer types.
-      Specifier = Specifier.substr(1);
-      do {
-        int Width = getInt(Specifier);
-        if (Width <= 0) {
-          return std::string("invalid native integer size \'") +
-            Specifier.str() + "\', must be a positive integer.";
-        }
-        if (Width != 0)
-          LegalIntWidths.push_back(Width);
-        Split = Token.split(':');
-        Specifier = Split.first;
-        Token = Split.second;
-      } while (!Specifier.empty() || !Token.empty());
+      for (;;) {
+        unsigned Width = getInt(Tok);
+        assert(Width != 0 && "width must be non-zero");
+        LegalIntWidths.push_back(Width);
+        if (Rest.empty())
+          break;
+        Split = split(Rest, ':');
+      }
       break;
     case 'S': { // Stack natural alignment.
-      int StackNaturalAlignBits = getInt(Specifier.substr(1));
-      if (StackNaturalAlignBits < 0 || StackNaturalAlignBits % 8 != 0) {
-        return "invalid natural stack alignment (S-field), "
-               "must be a positive 8-bit multiple";
-      }
-      StackNaturalAlign = StackNaturalAlignBits / 8;
+      StackNaturalAlign = inBytes(getInt(Tok));
       break;
     }
     default:
+      llvm_unreachable("Unknown specifier in datalayout string");
       break;
     }
   }
-
-  return "";
 }
 
 /// Default ctor.
diff --git a/lib/VMCore/DebugInfo.cpp b/lib/VMCore/DebugInfo.cpp
index 5eea2ce9e0..07508c879e 100644
--- a/lib/VMCore/DebugInfo.cpp
+++ b/lib/VMCore/DebugInfo.cpp
@@ -13,16 +13,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
 #include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
 #include "llvm/Module.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/raw_ostream.h"
@@ -75,6 +75,18 @@ uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
   return 0;
 }
 
+int64_t DIDescriptor::getInt64Field(unsigned Elt) const {
+  if (DbgNode == 0)
+    return 0;
+
+  if (Elt < DbgNode->getNumOperands())
+    if (ConstantInt *CI
+        = dyn_cast_or_null<ConstantInt>(DbgNode->getOperand(Elt)))
+      return CI->getSExtValue();
+
+  return 0;
+}
+
 DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
   if (DbgNode == 0)
     return DIDescriptor();
@@ -659,8 +671,9 @@ DIArray DICompileUnit::getSubprograms() const {
     return DIArray();
 
   if (MDNode *N = dyn_cast_or_null<MDNode>(DbgNode->getOperand(12)))
-    if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0)))
-      return DIArray(A);
+    if (N->getNumOperands() > 0)
+      if (MDNode *A = dyn_cast_or_null<MDNode>(N->getOperand(0)))
+        return DIArray(A);
   return DIArray();
 }
 
@@ -1036,7 +1049,11 @@ void DIDescriptor::print(raw_ostream &OS) const {
 }
 
 void DISubrange::printInternal(raw_ostream &OS) const {
-  OS << " [" << getLo() << ", " << getHi() << ']';
+  int64_t Count = getCount();
+  if (Count != -1)
+    OS << " [" << getLo() << ", " << Count - 1 << ']';
+  else
+    OS << " [unbounded]";
 }
 
 void DIScope::printInternal(raw_ostream &OS) const {
diff --git a/lib/VMCore/DebugLoc.cpp b/lib/VMCore/DebugLoc.cpp
index c6a30536e6..c57b5a3053 100644
--- a/lib/VMCore/DebugLoc.cpp
+++ b/lib/VMCore/DebugLoc.cpp
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/DebugLoc.h"
-#include "llvm/DebugInfo.h"
-#include "llvm/ADT/DenseMapInfo.h"
 #include "LLVMContextImpl.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/DebugInfo.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp
index 77b2403d87..3fe840f67c 100644
--- a/lib/VMCore/Dominators.cpp
+++ b/lib/VMCore/Dominators.cpp
@@ -15,17 +15,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/Dominators.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/DominatorInternals.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/Instructions.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 9c4f2d9399..9b2046bb12 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -11,22 +11,23 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Module.h"
+#include "llvm/Function.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/StringPool.h"
 #include "llvm/Support/RWMutex.h"
+#include "llvm/Support/StringPool.h"
 #include "llvm/Support/Threading.h"
-#include "SymbolTableListTraitsImpl.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
 using namespace llvm;
 
 // Explicit instantiations of SymbolTableListTraits since some of the methods
@@ -248,13 +249,13 @@ void Function::dropAllReferences() {
 }
 
 void Function::addAttribute(unsigned i, Attributes attr) {
-  AttrListPtr PAL = getAttributes();
+  AttributeSet PAL = getAttributes();
   PAL = PAL.addAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
 
 void Function::removeAttribute(unsigned i, Attributes attr) {
-  AttrListPtr PAL = getAttributes();
+  AttributeSet PAL = getAttributes();
   PAL = PAL.removeAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
index ad7a872b1f..467e53a3a3 100644
--- a/lib/VMCore/Globals.cpp
+++ b/lib/VMCore/Globals.cpp
@@ -12,12 +12,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/GlobalValue.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Constants.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/GlobalAlias.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/Module.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LeakDetector.h"
 using namespace llvm;
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index c0d1feeb9a..81ee4cee9a 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -12,12 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Instruction.h"
-#include "llvm/Type.h"
-#include "llvm/Instructions.h"
 #include "llvm/Constants.h"
+#include "llvm/Instructions.h"
 #include "llvm/Module.h"
+#include "llvm/Operator.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/LeakDetector.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
@@ -92,6 +93,95 @@ void Instruction::moveBefore(Instruction *MovePos) {
                                              this);
 }
 
+/// Set or clear the unsafe-algebra flag on this instruction, which must be an
+/// operator which supports this flag. See LangRef.html for the meaning of this
+/// flag.
+void Instruction::setHasUnsafeAlgebra(bool B) {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  cast<FPMathOperator>(this)->setHasUnsafeAlgebra(B);
+}
+
+/// Set or clear the NoNaNs flag on this instruction, which must be an operator
+/// which supports this flag. See LangRef.html for the meaning of this flag.
+void Instruction::setHasNoNaNs(bool B) {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  cast<FPMathOperator>(this)->setHasNoNaNs(B);
+}
+
+/// Set or clear the no-infs flag on this instruction, which must be an operator
+/// which supports this flag. See LangRef.html for the meaning of this flag.
+void Instruction::setHasNoInfs(bool B) {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  cast<FPMathOperator>(this)->setHasNoInfs(B);
+}
+
+/// Set or clear the no-signed-zeros flag on this instruction, which must be an
+/// operator which supports this flag. See LangRef.html for the meaning of this
+/// flag.
+void Instruction::setHasNoSignedZeros(bool B) {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  cast<FPMathOperator>(this)->setHasNoSignedZeros(B);
+}
+
+/// Set or clear the allow-reciprocal flag on this instruction, which must be an
+/// operator which supports this flag. See LangRef.html for the meaning of this
+/// flag.
+void Instruction::setHasAllowReciprocal(bool B) {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  cast<FPMathOperator>(this)->setHasAllowReciprocal(B);
+}
+
+/// Convenience function for setting all the fast-math flags on this
+/// instruction, which must be an operator which supports these flags. See
+/// LangRef.html for the meaning of these flats.
+void Instruction::setFastMathFlags(FastMathFlags FMF) {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  cast<FPMathOperator>(this)->setFastMathFlags(FMF);
+}
+
+/// Determine whether the unsafe-algebra flag is set.
+bool Instruction::hasUnsafeAlgebra() const {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  return cast<FPMathOperator>(this)->hasUnsafeAlgebra();
+}
+
+/// Determine whether the no-NaNs flag is set.
+bool Instruction::hasNoNaNs() const {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  return cast<FPMathOperator>(this)->hasNoNaNs();
+}
+
+/// Determine whether the no-infs flag is set.
+bool Instruction::hasNoInfs() const {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  return cast<FPMathOperator>(this)->hasNoInfs();
+}
+
+/// Determine whether the no-signed-zeros flag is set.
+bool Instruction::hasNoSignedZeros() const {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  return cast<FPMathOperator>(this)->hasNoSignedZeros();
+}
+
+/// Determine whether the allow-reciprocal flag is set.
+bool Instruction::hasAllowReciprocal() const {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  return cast<FPMathOperator>(this)->hasAllowReciprocal();
+}
+
+/// Convenience function for getting all the fast-math flags, which must be an
+/// operator which supports these flags. See LangRef.html for the meaning of
+/// these flats.
+FastMathFlags Instruction::getFastMathFlags() const {
+  assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
+  return cast<FPMathOperator>(this)->getFastMathFlags();
+}
+
+/// Copy I's fast-math flags
+void Instruction::copyFastMathFlags(const Instruction *I) {
+  setFastMathFlags(I->getFastMathFlags());
+}
+
 
 const char *Instruction::getOpcodeName(unsigned OpCode) {
   switch (OpCode) {
@@ -384,6 +474,20 @@ bool Instruction::isAssociative(unsigned Opcode) {
          Opcode == Add || Opcode == Mul;
 }
 
+bool Instruction::isAssociative() const {
+  unsigned Opcode = getOpcode();
+  if (isAssociative(Opcode))
+    return true;
+
+  switch (Opcode) {
+  case FMul:
+  case FAdd:
+    return cast<FPMathOperator>(this)->hasUnsafeAlgebra();
+  default:
+    return false;
+  }
+}
+
 /// isCommutative - Return true if the instruction is commutative:
 ///
 ///   Commutative operators satisfy: (x op y) === (y op x)
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index ca7cd4eb97..ded95349d4 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -12,16 +12,16 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Instructions.h"
 #include "LLVMContextImpl.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
-#include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Operator.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 using namespace llvm;
 
@@ -331,23 +331,23 @@ CallInst::CallInst(const CallInst &CI)
 }
 
 void CallInst::addAttribute(unsigned i, Attributes attr) {
-  AttrListPtr PAL = getAttributes();
+  AttributeSet PAL = getAttributes();
   PAL = PAL.addAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
 
 void CallInst::removeAttribute(unsigned i, Attributes attr) {
-  AttrListPtr PAL = getAttributes();
+  AttributeSet PAL = getAttributes();
   PAL = PAL.removeAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
 
 bool CallInst::hasFnAttr(Attributes::AttrVal A) const {
-  if (AttributeList.getParamAttributes(AttrListPtr::FunctionIndex)
+  if (AttributeList.getParamAttributes(AttributeSet::FunctionIndex)
       .hasAttribute(A))
     return true;
   if (const Function *F = getCalledFunction())
-    return F->getParamAttributes(AttrListPtr::FunctionIndex).hasAttribute(A);
+    return F->getParamAttributes(AttributeSet::FunctionIndex).hasAttribute(A);
   return false;
 }
 
@@ -572,11 +572,11 @@ void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
 }
 
 bool InvokeInst::hasFnAttr(Attributes::AttrVal A) const {
-  if (AttributeList.getParamAttributes(AttrListPtr::FunctionIndex).
+  if (AttributeList.getParamAttributes(AttributeSet::FunctionIndex).
       hasAttribute(A))
     return true;
   if (const Function *F = getCalledFunction())
-    return F->getParamAttributes(AttrListPtr::FunctionIndex).hasAttribute(A);
+    return F->getParamAttributes(AttributeSet::FunctionIndex).hasAttribute(A);
   return false;
 }
 
@@ -589,13 +589,13 @@ bool InvokeInst::paramHasAttr(unsigned i, Attributes::AttrVal A) const {
 }
 
 void InvokeInst::addAttribute(unsigned i, Attributes attr) {
-  AttrListPtr PAL = getAttributes();
+  AttributeSet PAL = getAttributes();
   PAL = PAL.addAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
 
 void InvokeInst::removeAttribute(unsigned i, Attributes attr) {
-  AttrListPtr PAL = getAttributes();
+  AttributeSet PAL = getAttributes();
   PAL = PAL.removeAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp
index 2446ec996d..68c4a766f6 100644
--- a/lib/VMCore/LLVMContext.cpp
+++ b/lib/VMCore/LLVMContext.cpp
@@ -13,12 +13,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
+#include "LLVMContextImpl.h"
 #include "llvm/Constants.h"
 #include "llvm/Instruction.h"
+#include "llvm/Metadata.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/SourceMgr.h"
-#include "LLVMContextImpl.h"
 #include <cctype>
 using namespace llvm;
 
diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp
index d35d2844b8..53140a2bd9 100644
--- a/lib/VMCore/LLVMContextImpl.cpp
+++ b/lib/VMCore/LLVMContextImpl.cpp
@@ -12,9 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "LLVMContextImpl.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Attributes.h"
 #include "llvm/Module.h"
-#include "llvm/ADT/STLExtras.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index d4c28b435a..aafbfc49e9 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -15,22 +15,22 @@
 #ifndef LLVM_LLVMCONTEXT_IMPL_H
 #define LLVM_LLVMCONTEXT_IMPL_H
 
-#include "llvm/LLVMContext.h"
 #include "AttributesImpl.h"
 #include "ConstantsContext.h"
 #include "LeaksContext.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Metadata.h"
-#include "llvm/Support/ValueHandle.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/Hashing.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Support/ValueHandle.h"
 #include <vector>
 
 namespace llvm {
diff --git a/lib/VMCore/LeakDetector.cpp b/lib/VMCore/LeakDetector.cpp
index f6651e93e2..7ffc0491b6 100644
--- a/lib/VMCore/LeakDetector.cpp
+++ b/lib/VMCore/LeakDetector.cpp
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "LLVMContextImpl.h"
 #include "llvm/Support/LeakDetector.h"
+#include "LLVMContextImpl.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ManagedStatic.h"
diff --git a/lib/VMCore/LeaksContext.h b/lib/VMCore/LeaksContext.h
index b9e59d46b7..faf3e952c8 100644
--- a/lib/VMCore/LeaksContext.h
+++ b/lib/VMCore/LeaksContext.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Value.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Value.h"
 
 namespace llvm {
 
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 95e5a8b2f9..91fa1a5931 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -13,14 +13,14 @@
 
 #include "llvm/Metadata.h"
 #include "LLVMContextImpl.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Instruction.h"
+#include "SymbolTableListTraitsImpl.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/STLExtras.h"
-#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Instruction.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/ValueHandle.h"
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index a6e335c10c..e6e7d99afb 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -12,18 +12,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Module.h"
-#include "llvm/InstrTypes.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GVMaterializer.h"
+#include "llvm/InstrTypes.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/ErrorHandling.h" // @LOCALMOD
-#include "SymbolTableListTraitsImpl.h"
 #include <algorithm>
 #include <cstdarg>
 #include <cstdlib>
@@ -56,7 +56,6 @@ Module::~Module() {
   GlobalList.clear();
   FunctionList.clear();
   AliasList.clear();
-  LibraryList.clear();
   NamedMDList.clear();
   delete ValSymTab;
   delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab);
@@ -139,7 +138,7 @@ void Module::getMDKindNames(SmallVectorImpl<StringRef> &Result) const {
 //
 Constant *Module::getOrInsertFunction(StringRef Name,
                                       FunctionType *Ty,
-                                      AttrListPtr AttributeList) {
+                                      AttributeSet AttributeList) {
   // See if we have a definition for the specified function already.
   GlobalValue *F = getNamedValue(Name);
   if (F == 0) {
@@ -172,7 +171,7 @@ Constant *Module::getOrInsertFunction(StringRef Name,
 
 Constant *Module::getOrInsertTargetIntrinsic(StringRef Name,
                                              FunctionType *Ty,
-                                             AttrListPtr AttributeList) {
+                                             AttributeSet AttributeList) {
   // See if we have a definition for the specified function already.
   GlobalValue *F = getNamedValue(Name);
   if (F == 0) {
@@ -189,7 +188,7 @@ Constant *Module::getOrInsertTargetIntrinsic(StringRef Name,
 
 Constant *Module::getOrInsertFunction(StringRef Name,
                                       FunctionType *Ty) {
-  return getOrInsertFunction(Name, Ty, AttrListPtr());
+  return getOrInsertFunction(Name, Ty, AttributeSet());
 }
 
 // getOrInsertFunction - Look up the specified function in the module symbol
@@ -198,7 +197,7 @@ Constant *Module::getOrInsertFunction(StringRef Name,
 // arguments, which makes it easier for clients to use.
 //
 Constant *Module::getOrInsertFunction(StringRef Name,
-                                      AttrListPtr AttributeList,
+                                      AttributeSet AttributeList,
                                       Type *RetTy, ...) {
   va_list Args;
   va_start(Args, RetTy);
@@ -231,7 +230,7 @@ Constant *Module::getOrInsertFunction(StringRef Name,
   // Build the function type and chain to the other getOrInsertFunction...
   return getOrInsertFunction(Name,
                              FunctionType::get(RetTy, ArgTys, false),
-                             AttrListPtr());
+                             AttributeSet());
 }
 
 // getFunction - Look up the specified function in the module symbol table.
@@ -452,23 +451,6 @@ void Module::dropAllReferences() {
     I->dropAllReferences();
 }
 
-void Module::addLibrary(StringRef Lib) {
-  for (Module::lib_iterator I = lib_begin(), E = lib_end(); I != E; ++I)
-    if (*I == Lib)
-      return;
-  LibraryList.push_back(Lib);
-}
-
-void Module::removeLibrary(StringRef Lib) {
-  LibraryListType::iterator I = LibraryList.begin();
-  LibraryListType::iterator E = LibraryList.end();
-  for (;I != E; ++I)
-    if (*I == Lib) {
-      LibraryList.erase(I);
-      return;
-    }
-}
-
 
 // @LOCALMOD-BEGIN
 // TODO(pdox):
@@ -577,11 +559,12 @@ Module::dumpMeta(raw_ostream &OS) const {
   }
   OS << "\n";
   OS << "SOName: " << getSOName() << "\n";
+  /* Commented out until we put lib_iterator back
   for (Module::lib_iterator L = lib_begin(),
                             E = lib_end();
        L != E; ++L) {
     OS << "NeedsLibrary: " << (*L) << "\n";
-  }
+    }*/
   std::vector<NeededRecord> NList;
   getNeededRecords(&NList);
   for (unsigned i = 0; i < NList.size(); ++i) {
@@ -639,10 +622,12 @@ static void getNeededRecordFor(const Module *M,
 // Place the complete list of needed records in NeededOut.
 void Module::getNeededRecords(std::vector<NeededRecord> *NeededOut) const {
   // Iterate through the libraries needed, grabbing each NeededRecord.
+  /* commented out until we pub lib_iterator back
   for (lib_iterator I = lib_begin(), E = lib_end(); I != E; ++I) {
     NeededRecord NR;
     getNeededRecordFor(this, *I, &NR);
     NeededOut->push_back(NR);
   }
+  */
 }
-// @LOCALMOD-END
-\ No newline at end of file
+// @LOCALMOD-END
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index 994a7ffcee..ec448e6420 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -14,8 +14,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Pass.h"
-#include "llvm/PassRegistry.h"
 #include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/PassRegistry.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/PassNameParser.h"
 #include "llvm/Support/raw_ostream.h"
@@ -133,16 +133,6 @@ Pass *FunctionPass::createPrinterPass(raw_ostream &O,
   return createPrintFunctionPass(Banner, &O);
 }
 
-bool FunctionPass::doInitialization(Module &) {
-  // By default, don't do anything.
-  return false;
-}
-
-bool FunctionPass::doFinalization(Module &) {
-  // By default, don't do anything.
-  return false;
-}
-
 PassManagerType FunctionPass::getPotentialPassManagerType() const {
   return PMT_FunctionPassManager;
 }
@@ -157,11 +147,6 @@ Pass *BasicBlockPass::createPrinterPass(raw_ostream &O,
   llvm_unreachable("BasicBlockPass printing unsupported.");
 }
 
-bool BasicBlockPass::doInitialization(Module &) {
-  // By default, don't do anything.
-  return false;
-}
-
 bool BasicBlockPass::doInitialization(Function &) {
   // By default, don't do anything.
   return false;
@@ -172,11 +157,6 @@ bool BasicBlockPass::doFinalization(Function &) {
   return false;
 }
 
-bool BasicBlockPass::doFinalization(Module &) {
-  // By default, don't do anything.
-  return false;
-}
-
 PassManagerType BasicBlockPass::getPotentialPassManagerType() const {
   return PMT_BasicBlockPassManager;
 }
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index 3a8a9e25e3..712b877501 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -13,18 +13,18 @@
 
 
 #include "llvm/PassManagers.h"
-#include "llvm/PassManager.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/Assembly/Writer.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Timer.h"
-#include "llvm/Module.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
 #include "llvm/Support/PassNameParser.h"
+#include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Mutex.h"
 #include <algorithm>
 #include <map>
 using namespace llvm;
@@ -309,6 +309,9 @@ public:
   /// whether any of the passes modifies the module, and if so, return true.
   bool runOnModule(Module &M);
 
+  using llvm::Pass::doInitialization;
+  using llvm::Pass::doFinalization;
+
   /// doInitialization - Run all of the initializers for the module passes.
   ///
   bool doInitialization();
@@ -402,6 +405,9 @@ public:
   /// whether any of the passes modifies the module, and if so, return true.
   bool run(Module &M);
 
+  using llvm::Pass::doInitialization;
+  using llvm::Pass::doFinalization;
+
   /// doInitialization - Run all of the initializers for the module passes.
   ///
   bool doInitialization();
@@ -1327,7 +1333,7 @@ bool BBPassManager::doInitialization(Module &M) {
 bool BBPassManager::doFinalization(Module &M) {
   bool Changed = false;
 
-  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+  for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index)
     Changed |= getContainedPass(Index)->doFinalization(M);
 
   return Changed;
@@ -1417,6 +1423,12 @@ bool FunctionPassManagerImpl::doInitialization(Module &M) {
   dumpArguments();
   dumpPasses();
 
+  SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
+  for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
+       E = IPV.end(); I != E; ++I) {
+    Changed |= (*I)->doInitialization(M);
+  }
+
   for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
     Changed |= getContainedManager(Index)->doInitialization(M);
 
@@ -1426,9 +1438,15 @@ bool FunctionPassManagerImpl::doInitialization(Module &M) {
 bool FunctionPassManagerImpl::doFinalization(Module &M) {
   bool Changed = false;
 
-  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+  for (int Index = getNumContainedManagers() - 1; Index >= 0; --Index)
     Changed |= getContainedManager(Index)->doFinalization(M);
 
+  SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
+  for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
+       E = IPV.end(); I != E; ++I) {
+    Changed |= (*I)->doFinalization(M);
+  }
+
   return Changed;
 }
 
@@ -1528,12 +1546,12 @@ bool FPPassManager::runOnFunction(Function &F) {
 }
 
 bool FPPassManager::runOnModule(Module &M) {
-  bool Changed = doInitialization(M);
+  bool Changed = false;
 
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
     Changed |= runOnFunction(*I);
 
-  return doFinalization(M) || Changed;
+  return Changed;
 }
 
 bool FPPassManager::doInitialization(Module &M) {
@@ -1541,16 +1559,16 @@ bool FPPassManager::doInitialization(Module &M) {
 
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
     Changed |= getContainedPass(Index)->doInitialization(M);
-
+  
   return Changed;
 }
 
 bool FPPassManager::doFinalization(Module &M) {
   bool Changed = false;
 
-  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+  for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index)
     Changed |= getContainedPass(Index)->doFinalization(M);
-
+  
   return Changed;
 }
 
@@ -1572,6 +1590,10 @@ MPPassManager::runOnModule(Module &M) {
     Changed |= FPP->doInitialization(M);
   }
 
+  // Initialize module passes
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+    Changed |= getContainedPass(Index)->doInitialization(M);
+
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     ModulePass *MP = getContainedPass(Index);
     bool LocalChanged = false;
@@ -1600,6 +1622,10 @@ MPPassManager::runOnModule(Module &M) {
     removeDeadPasses(MP, M.getModuleIdentifier(), ON_MODULE_MSG);
   }
 
+  // Finalize module passes
+  for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index)
+    Changed |= getContainedPass(Index)->doFinalization(M);
+
   // Finalize on-the-fly passes
   for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
        I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
@@ -1610,29 +1636,7 @@ MPPassManager::runOnModule(Module &M) {
     FPP->releaseMemoryOnTheFly();
     Changed |= FPP->doFinalization(M);
   }
-
-  return Changed;
-}
-
-/// Run all of the initializers for the module passes.
-///
-bool MPPassManager::doInitialization() {
-  bool Changed = false;
-
-  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
-    Changed |= getContainedPass(Index)->doInitialization();
-
-  return Changed;
-}
-
-/// Run all of the finalizers for the module passes.
-///
-bool MPPassManager::doFinalization() {
-  bool Changed = false;
-
-  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
-    Changed |= getContainedPass(Index)->doFinalization();
-
+  
   return Changed;
 }
 
@@ -1654,18 +1658,6 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
 
     OnTheFlyManagers[P] = FPP;
   }
-
-  // If RequiredPass is an analysis pass and it is available then do not
-  // generate the analysis again. Stale analysis info should not be
-  // available at this point.
-  const PassInfo *PI =
-    PassRegistry::getPassRegistry()->getPassInfo(RequiredPass->getPassID());
-  if (PI && PI->isAnalysis() && 
-      FPP->getTopLevelManager()->findAnalysisPass(RequiredPass->getPassID())) {
-    delete RequiredPass;
-    return;
-  }
-
   FPP->add(RequiredPass);
 
   // Register P as the last user of RequiredPass.
@@ -1692,24 +1684,6 @@ Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){
 //===----------------------------------------------------------------------===//
 // PassManagerImpl implementation
 
-bool PassManagerImpl::doInitialization() {
-  bool Changed = false;
-
-  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
-    Changed |= getContainedManager(Index)->doInitialization();
-
-  return Changed;
-}
-
-bool PassManagerImpl::doFinalization() {
-  bool Changed = false;
-
-  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
-    Changed |= getContainedManager(Index)->doFinalization();
-
-  return Changed;
-}
-
 //
 /// run - Execute all of the passes scheduled for execution.  Keep track of
 /// whether any of the passes modifies the module, and if so, return true.
@@ -1720,9 +1694,21 @@ bool PassManagerImpl::run(Module &M) {
   dumpArguments();
   dumpPasses();
 
+  SmallVectorImpl<ImmutablePass *>& IPV = getImmutablePasses();
+  for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
+       E = IPV.end(); I != E; ++I) {
+    Changed |= (*I)->doInitialization(M);
+  }
+
   initializeAllAnalysisInfo();
   for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
     Changed |= getContainedManager(Index)->runOnModule(M);
+
+  for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
+       E = IPV.end(); I != E; ++I) {
+    Changed |= (*I)->doFinalization(M);
+  }
+
   return Changed;
 }
 
@@ -1754,18 +1740,6 @@ bool PassManager::run(Module &M) {
   return PM->run(M);
 }
 
-/// doInitialization - Run all of the initializers for the module passes.
-///
-bool PassManager::doInitialization() {
-  return PM->doInitialization();
-}
-
-/// doFinalization - Run all of the finalizers for the module passes.
-///
-bool PassManager::doFinalization() {
-  return PM->doFinalization();
-}
-
 //===----------------------------------------------------------------------===//
 // TimingInfo Class - This class is used to calculate information about the
 // amount of time each pass takes to execute.  This only happens with
diff --git a/lib/VMCore/PassRegistry.cpp b/lib/VMCore/PassRegistry.cpp
index 2df65572c5..5d2287927c 100644
--- a/lib/VMCore/PassRegistry.cpp
+++ b/lib/VMCore/PassRegistry.cpp
@@ -13,14 +13,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/PassRegistry.h"
-#include "llvm/PassSupport.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Mutex.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Function.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
 #include <vector>
 
 using namespace llvm;
diff --git a/lib/VMCore/PrintModulePass.cpp b/lib/VMCore/PrintModulePass.cpp
index 1f1fbc91bc..34b2918c3d 100644
--- a/lib/VMCore/PrintModulePass.cpp
+++ b/lib/VMCore/PrintModulePass.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Assembly/PrintModulePass.h"
-
 #include "llvm/Function.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 4d75a7e060..3c3058288c 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -11,11 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Type.h"
 #include "LLVMContextImpl.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Module.h"
 #include <algorithm>
 #include <cstdarg>
-#include "llvm/ADT/SmallString.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/VMCore/TypeFinder.cpp b/lib/VMCore/TypeFinder.cpp
index 4de649fb3f..56db645ebc 100644
--- a/lib/VMCore/TypeFinder.cpp
+++ b/lib/VMCore/TypeFinder.cpp
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/TypeFinder.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/Metadata.h"
 #include "llvm/Module.h"
-#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 void TypeFinder::run(const Module &M, bool onlyNamed) {
diff --git a/lib/VMCore/User.cpp b/lib/VMCore/User.cpp
index e847ce6ee5..05b10b5329 100644
--- a/lib/VMCore/User.cpp
+++ b/lib/VMCore/User.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/User.h"
 #include "llvm/Constant.h"
 #include "llvm/GlobalValue.h"
-#include "llvm/User.h"
 #include "llvm/Operator.h"
 
 namespace llvm {
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 8d0720dc12..b10e093c15 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -11,23 +11,24 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Value.h"
 #include "LLVMContextImpl.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Constant.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/InstrTypes.h"
 #include "llvm/Instructions.h"
-#include "llvm/Operator.h"
 #include "llvm/Module.h"
-#include "llvm/ValueSymbolTable.h"
-#include "llvm/ADT/SmallString.h"
+#include "llvm/Operator.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/ValueHandle.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ValueSymbolTable.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp
index f1c970361a..8707daac30 100644
--- a/lib/VMCore/ValueSymbolTable.cpp
+++ b/lib/VMCore/ValueSymbolTable.cpp
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "valuesymtab"
-#include "llvm/GlobalValue.h"
-#include "llvm/Type.h"
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/GlobalValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 // Class destructor
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index 2ee9f0f4c9..bbc971c6d2 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Type.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 EVT EVT::changeExtendedVectorElementTypeToInteger() const {
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 3782957f3b..0c9493eb7d 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -46,29 +46,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/CallingConv.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/InlineAsm.h"
+#include "llvm/InstVisitor.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Support/CallSite.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/InstVisitor.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -299,7 +299,7 @@ namespace {
                              SmallVectorImpl<Type*> &ArgTys);
     void VerifyParameterAttrs(Attributes Attrs, Type *Ty,
                               bool isReturnValue, const Value *V);
-    void VerifyFunctionAttrs(FunctionType *FT, const AttrListPtr &Attrs,
+    void VerifyFunctionAttrs(FunctionType *FT, const AttributeSet &Attrs,
                              const Value *V);
 
     void WriteValue(const Value *V) {
@@ -585,7 +585,7 @@ void Verifier::VerifyParameterAttrs(Attributes Attrs, Type *Ty,
 // VerifyFunctionAttrs - Check parameter attributes against a function type.
 // The value V is printed in error messages.
 void Verifier::VerifyFunctionAttrs(FunctionType *FT,
-                                   const AttrListPtr &Attrs,
+                                   const AttributeSet &Attrs,
                                    const Value *V) {
   if (Attrs.isEmpty())
     return;
@@ -651,7 +651,7 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT,
           "'noinline and alwaysinline' are incompatible!", V);
 }
 
-static bool VerifyAttributeCount(const AttrListPtr &Attrs, unsigned Params) {
+static bool VerifyAttributeCount(const AttributeSet &Attrs, unsigned Params) {
   if (Attrs.isEmpty())
     return true;
 
@@ -687,7 +687,7 @@ void Verifier::visitFunction(Function &F) {
   Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
           "Invalid struct return type!", &F);
 
-  const AttrListPtr &Attrs = F.getAttributes();
+  const AttributeSet &Attrs = F.getAttributes();
 
   Assert1(VerifyAttributeCount(Attrs, FT->getNumParams()),
           "Attributes after last parameter!", &F);
@@ -1200,7 +1200,7 @@ void Verifier::VerifyCallSite(CallSite CS) {
             "Call parameter type does not match function signature!",
             CS.getArgument(i), FTy->getParamType(i), I);
 
-  const AttrListPtr &Attrs = CS.getAttributes();
+  const AttributeSet &Attrs = CS.getAttributes();
 
   Assert1(VerifyAttributeCount(Attrs, CS.arg_size()),
           "Attributes after last parameter!", I);
diff --git a/projects/sample/Makefile.llvm.rules b/projects/sample/Makefile.llvm.rules
index 7ed1c1b4ed..89b9e56ef4 100644
--- a/projects/sample/Makefile.llvm.rules
+++ b/projects/sample/Makefile.llvm.rules
@@ -250,6 +250,15 @@ ifeq ($(ENABLE_LIBCPP),1)
   LD.Flags +=  -stdlib=libc++
 endif
 
+ifeq ($(ENABLE_CXX11),1)
+  CXX.Flags += -std=c++11
+endif
+
+ifeq ($(ENABLE_WERROR),1)
+  CXX.Flags += -Werror
+  C.Flags += -Werror
+endif
+
 ifeq ($(ENABLE_PROFILING),1)
   BuildMode := $(BuildMode)+Profile
   CXX.Flags := $(filter-out -fomit-frame-pointer,$(CXX.Flags)) -pg -g
diff --git a/projects/sample/tools/sample/main.c b/projects/sample/tools/sample/main.c
index 2880265f84..ec0c3df605 100644
--- a/projects/sample/tools/sample/main.c
+++ b/projects/sample/tools/sample/main.c
@@ -1,10 +1,8 @@
+#include "sample.h"
 #include <stdio.h>
 #include <stdlib.h>
-
 #include <unistd.h>
 
-#include "sample.h"
-
 int
 main (int argc, char ** argv)
 {
diff --git a/test/Analysis/BasicAA/phi-speculation.ll b/test/Analysis/BasicAA/phi-speculation.ll
index 21c6592986..5e1e118d98 100644
--- a/test/Analysis/BasicAA/phi-speculation.ll
+++ b/test/Analysis/BasicAA/phi-speculation.ll
@@ -4,9 +4,9 @@ target datalayout =
 ; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
 
 ; ptr_phi and ptr2_phi do not alias.
+; CHECK: test_noalias_1
 ; CHECK: NoAlias: i32* %ptr2_phi, i32* %ptr_phi
-
-define i32 @test_noalias(i32* %ptr2, i32 %count, i32* %coeff) {
+define i32 @test_noalias_1(i32* %ptr2, i32 %count, i32* %coeff) {
 entry:
   %ptr = getelementptr inbounds i32* %ptr2, i64 1
   br label %while.body
@@ -31,3 +31,64 @@ while.body:
 the_exit:
   ret i32 %add
 }
+
+; CHECK: test_noalias_2
+; CHECK: NoAlias: i32* %ptr_outer_phi, i32* %ptr_outer_phi2
+; CHECK: NoAlias: i32* %ptr2_phi, i32* %ptr_phi
+define i32 @test_noalias_2(i32* %ptr2, i32 %count, i32* %coeff) {
+entry:
+  %ptr = getelementptr inbounds i32* %ptr2, i64 1
+  br label %outer.while.header
+
+outer.while.header:
+  %ptr_outer_phi = phi i32* [%ptr_inc_outer, %outer.while.backedge], [ %ptr, %entry]
+  %ptr_outer_phi2 = phi i32* [%ptr2_inc_outer, %outer.while.backedge], [ %ptr2, %entry]
+  %num.outer = phi i32 [ %count, %entry ], [ %dec.outer, %outer.while.backedge ]
+  br label %while.body
+
+while.body:
+  %num = phi i32 [ %count, %outer.while.header ], [ %dec, %while.body ]
+  %ptr_phi = phi i32* [ %ptr_outer_phi, %outer.while.header ], [ %ptr_inc, %while.body ]
+  %ptr2_phi = phi i32* [ %ptr_outer_phi2, %outer.while.header ], [ %ptr2_inc, %while.body ]
+  %result.09 = phi i32 [ 0 , %outer.while.header ], [ %add, %while.body ]
+  %dec = add nsw i32 %num, -1
+  %0 = load i32* %ptr_phi, align 4
+  store i32 %0, i32* %ptr2_phi, align 4
+  %1 = load i32* %coeff, align 4
+  %2 = load i32* %ptr_phi, align 4
+  %mul = mul nsw i32 %1, %2
+  %add = add nsw i32 %mul, %result.09
+  %tobool = icmp eq i32 %dec, 0
+  %ptr_inc = getelementptr inbounds i32* %ptr_phi, i64 1
+  %ptr2_inc = getelementptr inbounds i32* %ptr2_phi, i64 1
+  br i1 %tobool, label %outer.while.backedge, label %while.body
+
+outer.while.backedge:
+  %ptr_inc_outer = getelementptr inbounds i32* %ptr_phi, i64 1
+  %ptr2_inc_outer = getelementptr inbounds i32* %ptr2_phi, i64 1
+  %dec.outer = add nsw i32 %num.outer, -1
+  %br.cond = icmp eq i32 %dec.outer, 0
+  br i1 %br.cond, label %the_exit, label %outer.while.header
+
+the_exit:
+  ret i32 %add
+}
+
+; CHECK: test_noalias_3
+; CHECK: MayAlias: i8* %ptr2_phi, i8* %ptr_phi
+define i32 @test_noalias_3(i8* noalias %x, i8* noalias %y, i8* noalias %z,
+                           i32 %count) {
+entry:
+  br label %while.body
+
+while.body:
+  %num = phi i32 [ %count, %entry ], [ %dec, %while.body ]
+  %ptr_phi = phi i8* [ %x, %entry ], [ %z, %while.body ]
+  %ptr2_phi = phi i8* [ %y, %entry ], [ %ptr_phi, %while.body ]
+  %dec = add nsw i32 %num, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %the_exit, label %while.body
+
+the_exit:
+  ret i32 1
+}
diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll
index 37cca8d540..ae78d44aeb 100644
--- a/test/Analysis/CostModel/X86/arith.ll
+++ b/test/Analysis/CostModel/X86/arith.ll
@@ -14,7 +14,7 @@ define i32 @add(i32 %arg) {
   %D = add <4 x i64> undef, undef
   ;CHECK: cost of 8 {{.*}} add
   %E = add <8 x i64> undef, undef
-  ;CHECK: cost of 1 {{.*}} ret
+  ;CHECK: cost of 0 {{.*}} ret
   ret i32 undef
 }
 
@@ -28,7 +28,7 @@ define i32 @xor(i32 %arg) {
   %C = xor <2 x i64> undef, undef
   ;CHECK: cost of 1 {{.*}} xor
   %D = xor <4 x i64> undef, undef
-  ;CHECK: cost of 1 {{.*}} ret
+  ;CHECK: cost of 0 {{.*}} ret
   ret i32 undef
 }
 
diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll
index 75c97a781e..cedc682598 100644
--- a/test/Analysis/CostModel/X86/cast.ll
+++ b/test/Analysis/CostModel/X86/cast.ll
@@ -28,7 +28,7 @@ define i32 @add(i32 %arg) {
   ;CHECK: cost of 0 {{.*}} trunc
   %H = trunc i32 undef to i1
 
-  ;CHECK: cost of 1 {{.*}} ret
+  ;CHECK: cost of 0 {{.*}} ret
   ret i32 undef
 }
 
diff --git a/test/Analysis/CostModel/X86/cmp.ll b/test/Analysis/CostModel/X86/cmp.ll
index f868bd18b5..90b09c1154 100644
--- a/test/Analysis/CostModel/X86/cmp.ll
+++ b/test/Analysis/CostModel/X86/cmp.ll
@@ -35,7 +35,7 @@ define i32 @cmp(i32 %arg) {
   ;CHECK: cost of 4 {{.*}} icmp
   %M = icmp eq <32 x i8> undef, undef
 
-  ;CHECK: cost of 1 {{.*}} ret
+  ;CHECK: cost of 0 {{.*}} ret
   ret i32 undef
 }
 
diff --git a/test/Analysis/CostModel/X86/i32.ll b/test/Analysis/CostModel/X86/i32.ll
index 4015e0b1ee..52c295934c 100644
--- a/test/Analysis/CostModel/X86/i32.ll
+++ b/test/Analysis/CostModel/X86/i32.ll
@@ -2,7 +2,7 @@
 
 
 ;CHECK: cost of 2 {{.*}} add
-;CHECK: cost of 1 {{.*}} ret
+;CHECK: cost of 0 {{.*}} ret
 define i32 @no_info(i32 %arg) {
   %e = add i64 undef, undef
   ret i32 undef
diff --git a/test/Analysis/CostModel/X86/tiny.ll b/test/Analysis/CostModel/X86/tiny.ll
index cc7b443a7d..0dafdadb5b 100644
--- a/test/Analysis/CostModel/X86/tiny.ll
+++ b/test/Analysis/CostModel/X86/tiny.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ;CHECK: cost of 1 {{.*}} add
-;CHECK: cost of 1 {{.*}} ret
+;CHECK: cost of 0 {{.*}} ret
 define i32 @no_info(i32 %arg) {
   %e = add i32 %arg, %arg
   ret i32 %e
diff --git a/test/Analysis/CostModel/X86/vectorized-loop.ll b/test/Analysis/CostModel/X86/vectorized-loop.ll
index 7919a9ca9a..6c9e111bb1 100644
--- a/test/Analysis/CostModel/X86/vectorized-loop.ll
+++ b/test/Analysis/CostModel/X86/vectorized-loop.ll
@@ -41,7 +41,7 @@ vector.body:                                      ; preds = %for.body.lr.ph, %ve
   store <8 x i32> %11, <8 x i32>* %9, align 4
   %index.next = add i64 %index, 8
   %12 = icmp eq i64 %index.next, %end.idx.rnd.down
-  ;CHECK: cost of 1 {{.*}} br
+  ;CHECK: cost of 0 {{.*}} br
   br i1 %12, label %middle.block, label %vector.body
 
 middle.block:                                     ; preds = %vector.body, %for.body.lr.ph
@@ -65,11 +65,11 @@ for.body:                                         ; preds = %middle.block, %for.
   ;CHECK: cost of 0 {{.*}} trunc
   %16 = trunc i64 %indvars.iv.next to i32
   %cmp = icmp slt i32 %16, %end
-  ;CHECK: cost of 1 {{.*}} br
+  ;CHECK: cost of 0 {{.*}} br
   br i1 %cmp, label %for.body, label %for.end
 
 for.end:                                          ; preds = %middle.block, %for.body, %entry
-  ;CHECK: cost of 1 {{.*}} ret
+  ;CHECK: cost of 0 {{.*}} ret
   ret i32 undef
 }
 
diff --git a/test/Analysis/DependenceAnalysis/Banerjee.ll b/test/Analysis/DependenceAnalysis/Banerjee.ll
index bc6de425a3..003ee03ab0 100644
--- a/test/Analysis/DependenceAnalysis/Banerjee.ll
+++ b/test/Analysis/DependenceAnalysis/Banerjee.ll
@@ -14,12 +14,12 @@ define void @banerjee0(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [<= <>]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
@@ -65,12 +65,12 @@ entry:
   %cmp4 = icmp sgt i64 %n, 0
   br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end9
 
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [* <>]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - input [* *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 
 for.cond1.preheader.preheader:                    ; preds = %entry
   %0 = add i64 %n, 1
@@ -131,12 +131,12 @@ define void @banerjee2(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
@@ -181,12 +181,12 @@ define void @banerjee3(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [> >]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
@@ -231,12 +231,12 @@ define void @banerjee4(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
@@ -281,12 +281,12 @@ define void @banerjee5(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [< <]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
@@ -331,12 +331,12 @@ define void @banerjee6(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [=> <>]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
@@ -381,12 +381,12 @@ define void @banerjee7(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [> <=]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
@@ -431,12 +431,12 @@ define void @banerjee8(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [> <>]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
@@ -481,12 +481,12 @@ define void @banerjee9(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [<= =|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc8 ]
@@ -532,12 +532,12 @@ define void @banerjee10(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [<> =]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
@@ -582,12 +582,12 @@ define void @banerjee11(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [<= <>]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
@@ -632,12 +632,12 @@ define void @banerjee12(i64* %A, i64* %B, i64 %m, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [= <>]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i64* [ %B, %entry ], [ %scevgep, %for.inc7 ]
diff --git a/test/Analysis/DependenceAnalysis/Coupled.ll b/test/Analysis/DependenceAnalysis/Coupled.ll
index a5989fb88e..8c77849ae8 100644
--- a/test/Analysis/DependenceAnalysis/Coupled.ll
+++ b/test/Analysis/DependenceAnalysis/Coupled.ll
@@ -13,12 +13,12 @@ define void @couple0([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -49,12 +49,12 @@ define void @couple1([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent flow [-9]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -85,12 +85,12 @@ define void @couple2([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [*|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -123,12 +123,12 @@ define void @couple3([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -161,12 +161,12 @@ define void @couple4([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [*|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -200,12 +200,12 @@ define void @couple5([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -241,12 +241,12 @@ define void @couple6([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [=|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -277,12 +277,12 @@ define void @couple7([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -313,12 +313,12 @@ define void @couple8([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -350,12 +350,12 @@ define void @couple9([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -387,13 +387,13 @@ define void @couple10([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [>] splitable!
 ; CHECK: da analyze - split level = 1, iteration = 3!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -425,13 +425,13 @@ define void @couple11([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [=|<] splitable!
 ; CHECK: da analyze - split level = 1, iteration = 9!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -463,13 +463,13 @@ define void @couple12([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [<] splitable!
 ; CHECK: da analyze - split level = 1, iteration = 11!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -501,12 +501,12 @@ define void @couple13([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -537,13 +537,13 @@ define void @couple14([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [=|<] splitable!
 ; CHECK: da analyze - split level = 1, iteration = 9!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -575,12 +575,12 @@ define void @couple15([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
diff --git a/test/Analysis/DependenceAnalysis/ExactRDIV.ll b/test/Analysis/DependenceAnalysis/ExactRDIV.ll
index 8120739cc5..81f55161c0 100644
--- a/test/Analysis/DependenceAnalysis/ExactRDIV.ll
+++ b/test/Analysis/DependenceAnalysis/ExactRDIV.ll
@@ -14,12 +14,12 @@ define void @rdiv0(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -62,12 +62,12 @@ define void @rdiv1(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -108,12 +108,12 @@ define void @rdiv2(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -154,12 +154,12 @@ define void @rdiv3(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -200,12 +200,12 @@ define void @rdiv4(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
-; CHECK: da analyze - flow!
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -246,12 +246,12 @@ define void @rdiv5(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -293,12 +293,12 @@ define void @rdiv6(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -340,12 +340,12 @@ define void @rdiv7(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -387,12 +387,12 @@ define void @rdiv8(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
-; CHECK: da analyze - flow!
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.03 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -434,12 +434,12 @@ define void @rdiv9(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [S S|<]!
+; CHECK: da analyze - consistent input [S S]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc5
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc5 ]
@@ -483,12 +483,12 @@ define void @rdiv10(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [S S|<]!
+; CHECK: da analyze - consistent input [S S]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc5
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc5 ]
@@ -531,12 +531,12 @@ define void @rdiv11(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [S S|<]!
+; CHECK: da analyze - consistent input [S S]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc5
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc5 ]
@@ -579,12 +579,12 @@ define void @rdiv12(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [* *|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [S S|<]!
+; CHECK: da analyze - consistent input [S S]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc5
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc5 ]
diff --git a/test/Analysis/DependenceAnalysis/ExactSIV.ll b/test/Analysis/DependenceAnalysis/ExactSIV.ll
index 7485034108..586bbe5096 100644
--- a/test/Analysis/DependenceAnalysis/ExactSIV.ll
+++ b/test/Analysis/DependenceAnalysis/ExactSIV.ll
@@ -13,12 +13,12 @@ define void @exact0(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [<=|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -50,12 +50,12 @@ define void @exact1(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -88,12 +88,12 @@ define void @exact2(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -124,12 +124,12 @@ define void @exact3(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [>]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -160,12 +160,12 @@ define void @exact4(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [>]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -196,12 +196,12 @@ define void @exact5(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [=>|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -232,12 +232,12 @@ define void @exact6(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [=>|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -268,12 +268,12 @@ define void @exact7(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [*|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -304,12 +304,12 @@ define void @exact8(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -340,12 +340,12 @@ define void @exact9(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [>]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -376,12 +376,12 @@ define void @exact10(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [>]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -412,12 +412,12 @@ define void @exact11(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [=>|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -448,12 +448,12 @@ define void @exact12(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [=>|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -484,12 +484,12 @@ define void @exact13(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [*|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
diff --git a/test/Analysis/DependenceAnalysis/GCD.ll b/test/Analysis/DependenceAnalysis/GCD.ll
index 6bad8ae9a1..a42212464f 100644
--- a/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/test/Analysis/DependenceAnalysis/GCD.ll
@@ -14,12 +14,12 @@ define void @gcd0(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [=> *|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - input [* *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc8 ]
@@ -66,12 +66,12 @@ define void @gcd1(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - input [* *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc9
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ]
@@ -119,12 +119,12 @@ define void @gcd2(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - input [* *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc9
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ]
@@ -172,12 +172,12 @@ define void @gcd3(i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [<> *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - input [* *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc7
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc7 ]
@@ -223,12 +223,12 @@ define void @gcd4(i32* %A, i32* %B, i64 %M, i64 %N) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - input [* *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc17
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc17 ]
@@ -284,12 +284,12 @@ define void @gcd5(i32* %A, i32* %B, i64 %M, i64 %N) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [<> *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - input [* *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc17
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc17 ]
@@ -346,12 +346,12 @@ entry:
   %cmp4 = icmp sgt i64 %n, 0
   br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end12
 
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - input [* *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 
 for.cond1.preheader.preheader:                    ; preds = %entry
   br label %for.cond1.preheader
@@ -417,12 +417,12 @@ entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end15
 
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [* *|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - input [* *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 
 for.cond1.preheader.preheader:                    ; preds = %entry
   br label %for.cond1.preheader
@@ -500,12 +500,12 @@ entry:
   %cmp4 = icmp sgt i32 %n, 0
   br i1 %cmp4, label %for.cond1.preheader.preheader, label %for.end15
 
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - input [* *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 
 for.cond1.preheader.preheader:                    ; preds = %entry
   br label %for.cond1.preheader
@@ -578,12 +578,12 @@ entry:
   %cmp4 = icmp eq i32 %n, 0
   br i1 %cmp4, label %for.end15, label %for.cond1.preheader.preheader
 
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - flow [* *|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [* *|<]!
+; CHECK: da analyze - input [* *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 
 for.cond1.preheader.preheader:                    ; preds = %entry
   br label %for.cond1.preheader
diff --git a/test/Analysis/DependenceAnalysis/Preliminary.ll b/test/Analysis/DependenceAnalysis/Preliminary.ll
index 97589db300..f36b85a595 100644
--- a/test/Analysis/DependenceAnalysis/Preliminary.ll
+++ b/test/Analysis/DependenceAnalysis/Preliminary.ll
@@ -13,9 +13,9 @@ define i32 @p0(i32 %n, i32* %A, i32* %B) nounwind uwtable ssp {
 entry:
   store i32 %n, i32* %A, align 4
 
-; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input!
+; CHECK: da analyze - none!
 
   %arrayidx1 = getelementptr inbounds i32* %B, i64 1
   %0 = load i32* %arrayidx1, align 4
@@ -31,9 +31,9 @@ define i32 @p1(i32 %n, i32* noalias %A, i32* noalias %B) nounwind uwtable ssp {
 entry:
   store i32 %n, i32* %A, align 4
 
-; CHECK: da analyze - consistent output!
 ; CHECK: da analyze - none!
-; CHECK: da analyze - consistent input!
+; CHECK: da analyze - none!
+; CHECK: da analyze - none!
 
   %arrayidx1 = getelementptr inbounds i32* %B, i64 1
   %0 = load i32* %arrayidx1, align 4
@@ -54,12 +54,12 @@ entry:
   %cmp10 = icmp sgt i64 %n, 0
   br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
 
-; CHECK: da analyze - consistent output [0 0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [-3 -2]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0 0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [* * *|<]!
+; CHECK: da analyze - output [* * *]!
 
 for.cond1.preheader.preheader:                    ; preds = %entry
   br label %for.cond1.preheader
@@ -162,13 +162,13 @@ entry:
   %cmp44 = icmp sgt i64 %n, 0
   br i1 %cmp44, label %for.cond1.preheader.preheader, label %for.end90
 
-; CHECK: da analyze - output [0 0 0 0 0 S * * * * S S|<]!
+; CHECK: da analyze - output [0 0 0 0 0 S * * * * S S]!
 ; CHECK: da analyze - flow [-6 * * => * * * * * * * *] splitable!
 ; CHECK: da analyze - split level = 3, iteration = 1!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0 S 0 0 S 0 S S S S 0 0|<]!
+; CHECK: da analyze - consistent input [0 S 0 0 S 0 S S S S 0 0]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [* * * * * * * * * * * *|<]!
+; CHECK: da analyze - output [* * * * * * * * * * * *]!
 
 for.cond1.preheader.preheader:                    ; preds = %entry
   br label %for.cond1.preheader
@@ -425,12 +425,12 @@ entry:
   %cmp1 = icmp sgt i64 %n, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
 
-; CHECK: da analyze - output [*|<]!
+; CHECK: da analyze - output [*]!
 ; CHECK: da analyze - flow [*|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -471,12 +471,12 @@ entry:
   %cmp1 = icmp sgt i64 %n, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
 
-; CHECK: da analyze - output [*|<]!
+; CHECK: da analyze - output [*]!
 ; CHECK: da analyze - flow [*|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -517,12 +517,12 @@ entry:
   %cmp1 = icmp sgt i64 %n, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent flow [2]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -559,12 +559,12 @@ entry:
   %idxprom = sext i8 %n to i64
   %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
 
-; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
 
   store i32 0, i32* %arrayidx, align 4
   %conv = sext i8 %n to i64
@@ -586,12 +586,12 @@ entry:
   %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
   store i32 0, i32* %arrayidx, align 4
 
-; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
 
   %conv = sext i16 %n to i64
   %add = add i64 %conv, 1
@@ -612,12 +612,12 @@ entry:
   %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
   store i32 0, i32* %arrayidx, align 4
 
-; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
 
   %add = add nsw i32 %n, 1
   %idxprom1 = sext i32 %add to i64
@@ -638,12 +638,12 @@ entry:
   %arrayidx = getelementptr inbounds i32* %A, i64 %idxprom
   store i32 0, i32* %arrayidx, align 4
 
-; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
 
   %add = add i32 %n, 1
   %idxprom1 = zext i32 %add to i64
@@ -672,9 +672,9 @@ entry:
   %cmp1 = icmp eq i64 %add.ptr.sum, 0
   br i1 %cmp1, label %while.end, label %while.body.preheader
 
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent anti [1]!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 while.body.preheader:                             ; preds = %entry
   br label %while.body
diff --git a/test/Analysis/DependenceAnalysis/Propagating.ll b/test/Analysis/DependenceAnalysis/Propagating.ll
index 32d253593f..f9034ede9d 100644
--- a/test/Analysis/DependenceAnalysis/Propagating.ll
+++ b/test/Analysis/DependenceAnalysis/Propagating.ll
@@ -14,12 +14,12 @@ define void @prop0([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - consistent output [0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent flow [1 -1]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc9
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ]
@@ -64,12 +64,12 @@ define void @prop1([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ss
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - consistent output [0 0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent flow [1 1 -1]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0 0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= = =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc18
   %B.addr.06 = phi i32* [ %B, %entry ], [ %scevgep7, %for.inc18 ]
@@ -126,12 +126,12 @@ define void @prop2([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - consistent output [0 S|<]!
+; CHECK: da analyze - consistent output [0 S]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc8
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc8 ]
@@ -176,12 +176,12 @@ define void @prop3([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - consistent output [0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc9
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc9 ]
@@ -227,12 +227,12 @@ define void @prop4([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - consistent output [0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent flow [2 -3]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc11
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc11 ]
@@ -279,13 +279,13 @@ define void @prop5([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ss
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - consistent output [0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [< -16] splitable!
 ; CHECK: da analyze - split level = 1, iteration = 11!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc13
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc13 ]
@@ -333,12 +333,12 @@ define void @prop6([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - consistent output [0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [=> -2]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc12
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc12 ]
@@ -386,13 +386,13 @@ define void @prop7([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - consistent output [0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [* -38] splitable!
 ; CHECK: da analyze - split level = 1, iteration = 4!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc14
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc14 ]
@@ -442,12 +442,12 @@ define void @prop8([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - consistent output [S 0|<]!
+; CHECK: da analyze - consistent output [S 0]!
 ; CHECK: da analyze - flow [p<= 2]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc10
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc10 ]
@@ -493,12 +493,12 @@ define void @prop9([100 x i32]* %A, i32* %B, i32 %n) nounwind uwtable ssp {
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - consistent output [0 0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [p<= 2]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [S 0|<]!
+; CHECK: da analyze - consistent input [S 0]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc10
   %B.addr.04 = phi i32* [ %B, %entry ], [ %scevgep, %for.inc10 ]
diff --git a/test/Analysis/DependenceAnalysis/Separability.ll b/test/Analysis/DependenceAnalysis/Separability.ll
index beda448e83..3dcaaec2ae 100644
--- a/test/Analysis/DependenceAnalysis/Separability.ll
+++ b/test/Analysis/DependenceAnalysis/Separability.ll
@@ -16,12 +16,12 @@ define void @sep0([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [0 * * S|<]!
+; CHECK: da analyze - output [0 * * S]!
 ; CHECK: da analyze - flow [-10 * * *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [0 * S *|<]!
+; CHECK: da analyze - input [0 * S *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= = = =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc22
   %B.addr.08 = phi i32* [ %B, %entry ], [ %scevgep11, %for.inc22 ]
@@ -91,12 +91,12 @@ define void @sep1([100 x [100 x i32]]* %A, i32* %B, i32 %n) nounwind uwtable ssp
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - output [0 * * S|<]!
+; CHECK: da analyze - output [0 * * S]!
 ; CHECK: da analyze - flow [> * * *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [0 * S *|<]!
+; CHECK: da analyze - input [0 * S *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= = = =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc22
   %B.addr.08 = phi i32* [ %B, %entry ], [ %scevgep11, %for.inc22 ]
@@ -165,12 +165,12 @@ define void @sep2([100 x [100 x [100 x i32]]]* %A, i32* %B, i32 %n) nounwind uwt
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - consistent output [0 S 0 0|<]!
+; CHECK: da analyze - consistent output [0 S 0 0]!
 ; CHECK: da analyze - flow [> * * -10]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [0 * * 0|<]!
+; CHECK: da analyze - input [0 * * 0]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= = = =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc26
   %B.addr.08 = phi i32* [ %B, %entry ], [ %scevgep11, %for.inc26 ]
@@ -239,12 +239,12 @@ define void @sep3([100 x [100 x [100 x i32]]]* %A, i32* %B, i32 %n) nounwind uwt
 entry:
   br label %for.cond1.preheader
 
-; CHECK: da analyze - consistent output [0 S 0 0|<]!
+; CHECK: da analyze - consistent output [0 S 0 0]!
 ; CHECK: da analyze - flow [> * * *]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - input [0 * * 0|<]!
+; CHECK: da analyze - input [0 * * 0]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [= = = =|<]!
+; CHECK: da analyze - none!
 
 for.cond1.preheader:                              ; preds = %entry, %for.inc27
   %B.addr.08 = phi i32* [ %B, %entry ], [ %scevgep11, %for.inc27 ]
diff --git a/test/Analysis/DependenceAnalysis/StrongSIV.ll b/test/Analysis/DependenceAnalysis/StrongSIV.ll
index 1cf00ad9c1..f499e84d48 100644
--- a/test/Analysis/DependenceAnalysis/StrongSIV.ll
+++ b/test/Analysis/DependenceAnalysis/StrongSIV.ll
@@ -14,12 +14,12 @@ entry:
   %cmp1 = icmp sgt i64 %n, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent flow [2]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -56,12 +56,12 @@ entry:
   %cmp1 = icmp sgt i32 %n, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent flow [2]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   %0 = sext i32 %n to i64
@@ -99,12 +99,12 @@ entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent flow [2]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -141,12 +141,12 @@ entry:
   %cmp1 = icmp sgt i32 %n, 0
   br i1 %cmp1, label %for.body.preheader, label %for.end
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent flow [2]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -183,12 +183,12 @@ define void @strong4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -218,12 +218,12 @@ define void @strong5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent flow [19]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -253,12 +253,12 @@ define void @strong6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent flow [3]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -290,12 +290,12 @@ define void @strong7(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -327,12 +327,12 @@ define void @strong8(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent flow [%n|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -363,12 +363,12 @@ entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -406,12 +406,12 @@ define void @strong10(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - consistent flow [0|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
diff --git a/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll b/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
index 5565f64811..81e61892d8 100644
--- a/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
+++ b/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
@@ -15,12 +15,12 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -77,12 +77,12 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond2.preheader, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -141,12 +141,12 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -203,12 +203,12 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -263,12 +263,12 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -324,12 +324,12 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.cond1.preheader, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -385,12 +385,12 @@ entry:
   %cmp4 = icmp eq i64 %n1, 0
   br i1 %cmp4, label %for.end7, label %for.cond1.preheader.preheader
 
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [S S|<]!
+; CHECK: da analyze - consistent input [S S]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - output [* *|<]!
+; CHECK: da analyze - output [* *]!
 
 for.cond1.preheader.preheader:                    ; preds = %entry
   br label %for.cond1.preheader
diff --git a/test/Analysis/DependenceAnalysis/SymbolicSIV.ll b/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
index 074cc56e9b..297096ce13 100644
--- a/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
+++ b/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
@@ -14,12 +14,12 @@ entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -59,12 +59,12 @@ entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -106,12 +106,12 @@ entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -151,12 +151,12 @@ entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -197,12 +197,12 @@ entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -242,12 +242,12 @@ entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -289,13 +289,13 @@ entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [*|<] splitable!
 ; CHECK: da analyze - split level = 1, iteration = ((0 smax (-1 + (-1 * %n))) /u 2)!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -334,12 +334,12 @@ entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -383,12 +383,12 @@ entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [<>]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
diff --git a/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll b/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
index 0fc73aa643..8b2e43f3d8 100644
--- a/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
+++ b/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
@@ -17,12 +17,12 @@ entry:
 for.body.preheader:                               ; preds = %entry
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [0|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %for.body.preheader, %for.body
   %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
@@ -59,13 +59,13 @@ entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [<>] splitable!
 ; CHECK: da analyze - split level = 1, iteration = 0!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -103,12 +103,12 @@ define void @weakcrossing2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -138,12 +138,12 @@ define void @weakcrossing3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [0|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -173,12 +173,12 @@ define void @weakcrossing4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -209,12 +209,12 @@ entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -252,13 +252,13 @@ define void @weakcrossing6(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [<>] splitable!
 ; CHECK: da analyze - split level = 1, iteration = 2!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
diff --git a/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll b/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll
index e78bc5a139..bc85e6c8b6 100644
--- a/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll
+++ b/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll
@@ -13,12 +13,12 @@ define void @weakzerodst0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [p<=|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [S|<]!
+; CHECK: da analyze - consistent input [S]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -50,12 +50,12 @@ entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [p<=|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [S|<]!
+; CHECK: da analyze - consistent input [S]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -92,12 +92,12 @@ define void @weakzerodst2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [S|<]!
+; CHECK: da analyze - consistent input [S]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -127,12 +127,12 @@ define void @weakzerodst3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [=>p|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [S|<]!
+; CHECK: da analyze - consistent input [S]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -162,12 +162,12 @@ define void @weakzerodst4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - flow [*|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [S|<]!
+; CHECK: da analyze - consistent input [S]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -197,12 +197,12 @@ define void @weakzerodst5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [S|<]!
+; CHECK: da analyze - consistent input [S]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -233,12 +233,12 @@ entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [S|<]!
+; CHECK: da analyze - consistent input [S]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
diff --git a/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll b/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll
index 2edba66901..2b3b2d00ec 100644
--- a/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll
+++ b/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll
@@ -13,12 +13,12 @@ define void @weakzerosrc0(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [S|<]!
+; CHECK: da analyze - consistent output [S]!
 ; CHECK: da analyze - flow [p<=|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -50,12 +50,12 @@ entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [S|<]!
+; CHECK: da analyze - consistent output [S]!
 ; CHECK: da analyze - flow [p<=|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
@@ -92,12 +92,12 @@ define void @weakzerosrc2(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [S|<]!
+; CHECK: da analyze - consistent output [S]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -127,12 +127,12 @@ define void @weakzerosrc3(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [S|<]!
+; CHECK: da analyze - consistent output [S]!
 ; CHECK: da analyze - flow [=>p|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -162,12 +162,12 @@ define void @weakzerosrc4(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [S|<]!
+; CHECK: da analyze - consistent output [S]!
 ; CHECK: da analyze - flow [*|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -197,12 +197,12 @@ define void @weakzerosrc5(i32* %A, i32* %B, i64 %n) nounwind uwtable ssp {
 entry:
   br label %for.body
 
-; CHECK: da analyze - consistent output [S|<]!
+; CHECK: da analyze - consistent output [S]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body:                                         ; preds = %entry, %for.body
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
@@ -233,12 +233,12 @@ entry:
   %cmp1 = icmp eq i64 %n, 0
   br i1 %cmp1, label %for.end, label %for.body.preheader
 
-; CHECK: da analyze - consistent output [S|<]!
+; CHECK: da analyze - consistent output [S]!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input [0|<]!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output [0|<]!
+; CHECK: da analyze - none!
 
 for.body.preheader:                               ; preds = %entry
   br label %for.body
diff --git a/test/Analysis/DependenceAnalysis/ZIV.ll b/test/Analysis/DependenceAnalysis/ZIV.ll
index 1e833baf28..5463c63ba3 100644
--- a/test/Analysis/DependenceAnalysis/ZIV.ll
+++ b/test/Analysis/DependenceAnalysis/ZIV.ll
@@ -14,12 +14,12 @@ entry:
   %arrayidx = getelementptr inbounds i32* %A, i64 %add
   store i32 0, i32* %arrayidx, align 4
 
-; CHECK: da analyze - consistent output!
-; CHECK: da analyze - consistent flow!
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent flow [|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
 
   %add1 = add i64 %n, 1
   %arrayidx2 = getelementptr inbounds i32* %A, i64 %add1
@@ -37,12 +37,12 @@ entry:
   %arrayidx = getelementptr inbounds i32* %A, i64 %n
   store i32 0, i32* %arrayidx, align 4
 
-; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
 
   %add = add i64 %n, 1
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %add
@@ -60,12 +60,12 @@ entry:
   %arrayidx = getelementptr inbounds i32* %A, i64 %n
   store i32 0, i32* %arrayidx, align 4
 
-; CHECK: da analyze - consistent output!
-; CHECK: da analyze - flow!
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [|<]!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent input!
+; CHECK: da analyze - none!
 ; CHECK: da analyze - confused!
-; CHECK: da analyze - consistent output!
+; CHECK: da analyze - none!
 
   %arrayidx1 = getelementptr inbounds i32* %A, i64 %m
   %0 = load i32* %arrayidx1, align 4
diff --git a/test/Assembler/fast-math-flags.ll b/test/Assembler/fast-math-flags.ll
new file mode 100644
index 0000000000..3a116c507f
--- /dev/null
+++ b/test/Assembler/fast-math-flags.ll
@@ -0,0 +1,142 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: opt -S < %s | FileCheck %s
+
+@addr   = external global i64
+@select = external global i1
+@vec    = external global <3 x float>
+@arr    = external global [3 x float]
+
+define float @none(float %x, float %y) {
+entry:
+; CHECK:  %vec = load  <3 x float>* @vec
+  %vec    = load  <3 x float>* @vec
+; CHECK:  %select = load i1* @select
+  %select = load i1* @select
+; CHECK:  %arr    = load [3 x float]* @arr
+  %arr    = load [3 x float]* @arr
+
+; CHECK:  %a = fadd  float %x, %y
+  %a = fadd  float %x, %y
+; CHECK:  %a_vec = fadd  <3 x float> %vec, %vec
+  %a_vec = fadd  <3 x float> %vec, %vec
+; CHECK:  %b = fsub  float %x, %y
+  %b = fsub  float %x, %y
+; CHECK:  %b_vec = fsub  <3 x float> %vec, %vec
+  %b_vec = fsub  <3 x float> %vec, %vec
+; CHECK:  %c = fmul  float %x, %y
+  %c = fmul  float %x, %y
+; CHECK:  %c_vec = fmul  <3 x float> %vec, %vec
+  %c_vec = fmul  <3 x float> %vec, %vec
+; CHECK:  %d = fdiv  float %x, %y
+  %d = fdiv  float %x, %y
+; CHECK:  %d_vec = fdiv  <3 x float> %vec, %vec
+  %d_vec = fdiv  <3 x float> %vec, %vec
+; CHECK:  %e = frem  float %x, %y
+  %e = frem  float %x, %y
+; CHECK:  %e_vec = frem  <3 x float> %vec, %vec
+  %e_vec = frem  <3 x float> %vec, %vec
+; CHECK:  ret  float %e
+  ret  float %e
+}
+
+; CHECK: no_nan
+define float @no_nan(float %x, float %y) {
+entry:
+; CHECK:  %vec = load <3 x float>* @vec
+  %vec    = load  <3 x float>* @vec
+; CHECK:  %select = load i1* @select
+  %select = load i1* @select
+; CHECK:  %arr = load  [3 x float]* @arr
+  %arr    = load  [3 x float]* @arr
+
+; CHECK:  %a = fadd nnan  float %x, %y
+  %a = fadd nnan  float %x, %y
+; CHECK:  %a_vec = fadd nnan  <3 x float> %vec, %vec
+  %a_vec = fadd nnan  <3 x float> %vec, %vec
+; CHECK:  %b = fsub nnan  float %x, %y
+  %b = fsub nnan  float %x, %y
+; CHECK:  %b_vec = fsub nnan  <3 x float> %vec, %vec
+  %b_vec = fsub nnan  <3 x float> %vec, %vec
+; CHECK:  %c = fmul nnan  float %x, %y
+  %c = fmul nnan  float %x, %y
+; CHECK:  %c_vec = fmul nnan  <3 x float> %vec, %vec
+  %c_vec = fmul nnan <3 x float> %vec, %vec
+; CHECK:  %d = fdiv nnan  float %x, %y
+  %d = fdiv nnan float %x, %y
+; CHECK:  %d_vec = fdiv nnan  <3 x float> %vec, %vec
+  %d_vec = fdiv nnan <3 x float> %vec, %vec
+; CHECK:  %e = frem nnan  float %x, %y
+  %e = frem nnan  float %x, %y
+; CHECK:  %e_vec = frem nnan  <3 x float> %vec, %vec
+  %e_vec = frem nnan  <3 x float> %vec, %vec
+; CHECK:  ret float %e
+  ret float %e
+}
+
+; CHECK: no_nan_inf
+define float @no_nan_inf(float %x, float %y) {
+entry:
+; CHECK:  %vec = load <3 x float>* @vec
+  %vec    = load <3 x float>* @vec
+; CHECK:  %select = load i1* @select
+  %select = load i1* @select
+; CHECK:  %arr = load [3 x float]* @arr
+  %arr    = load [3 x float]* @arr
+
+; CHECK:  %a = fadd nnan ninf  float %x, %y
+  %a = fadd ninf nnan  float %x, %y
+; CHECK:  %a_vec = fadd nnan  <3 x float> %vec, %vec
+  %a_vec = fadd nnan  <3 x float> %vec, %vec
+; CHECK:  %b = fsub nnan  float %x, %y
+  %b = fsub nnan  float %x, %y
+; CHECK:  %b_vec = fsub nnan ninf  <3 x float> %vec, %vec
+  %b_vec = fsub ninf nnan  <3 x float> %vec, %vec
+; CHECK:  %c = fmul nnan  float %x, %y
+  %c = fmul nnan  float %x, %y
+; CHECK:  %c_vec = fmul nnan  <3 x float> %vec, %vec
+  %c_vec = fmul nnan <3 x float> %vec, %vec
+; CHECK:  %d = fdiv nnan ninf  float %x, %y
+  %d = fdiv ninf nnan float %x, %y
+; CHECK:  %d_vec = fdiv nnan  <3 x float> %vec, %vec
+  %d_vec = fdiv nnan <3 x float> %vec, %vec
+; CHECK:  %e = frem nnan  float %x, %y
+  %e = frem nnan  float %x, %y
+; CHECK:  %e_vec = frem nnan ninf  <3 x float> %vec, %vec
+  %e_vec = frem ninf nnan  <3 x float> %vec, %vec
+; CHECK:  ret  float %e
+  ret  float %e
+}
+
+; CHECK: mixed_flags
+define float @mixed_flags(float %x, float %y) {
+entry:
+; CHECK:  %vec = load <3 x float>* @vec
+  %vec    = load <3 x float>* @vec
+; CHECK:  %select = load i1* @select
+  %select = load i1* @select
+; CHECK:  %arr    = load [3 x float]* @arr
+  %arr    = load [3 x float]* @arr
+
+; CHECK:  %a = fadd nnan ninf float %x, %y
+  %a = fadd ninf nnan float %x, %y
+; CHECK:  %a_vec = fadd nnan <3 x float> %vec, %vec
+  %a_vec = fadd nnan <3 x float> %vec, %vec
+; CHECK:  %b = fsub fast float %x, %y
+  %b = fsub nnan nsz fast float %x, %y
+; CHECK:  %b_vec = fsub nnan <3 x float> %vec, %vec
+  %b_vec = fsub nnan <3 x float> %vec, %vec
+; CHECK:  %c = fmul fast float %x, %y
+  %c = fmul nsz fast arcp float %x, %y
+; CHECK:  %c_vec = fmul nsz <3 x float> %vec, %vec
+  %c_vec = fmul nsz <3 x float> %vec, %vec
+; CHECK:  %d = fdiv nnan ninf arcp float %x, %y
+  %d = fdiv arcp ninf nnan float %x, %y
+; CHECK:  %d_vec = fdiv fast <3 x float> %vec, %vec
+  %d_vec = fdiv fast nnan arcp <3 x float> %vec, %vec
+; CHECK:  %e = frem nnan nsz float %x, %y
+  %e = frem nnan nsz float %x, %y
+; CHECK:  %e_vec = frem nnan <3 x float> %vec, %vec
+  %e_vec = frem nnan <3 x float> %vec, %vec
+; CHECK:  ret  float %e
+  ret  float %e
+}
diff --git a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
index 5cfbb4f944..1272a25793 100644
--- a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
+++ b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
@@ -10,7 +10,8 @@
 @STRIDE = internal global i32 8
 
 ; ASM:          .type   array00,%object         @ @array00
-; ASM-NEXT:     .lcomm  array00,80
+; ASM-NEXT:     .local  array00
+; ASM-NEXT:     .comm   array00,80,1
 ; ASM-NEXT:     .type   _MergedGlobals,%object  @ @_MergedGlobals
 
 
diff --git a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
index 6e0ef96196..f563eeef01 100644
--- a/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
+++ b/test/CodeGen/ARM/2011-10-26-memset-with-neon.ll
@@ -1,13 +1,5 @@
 ; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s
 
-; Should trigger a NEON store.
-; CHECK: vstr
-define void @f_0_12(i8* nocapture %c) nounwind optsize {
-entry:
-  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
-  ret void
-}
-
 ; Trigger multiple NEON stores.
 ; CHECK:      vst1.64
 ; CHECK-NEXT: vst1.64
diff --git a/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll b/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
new file mode 100644
index 0000000000..2f55204aa4
--- /dev/null
+++ b/test/CodeGen/ARM/2012-08-23-legalize-vmull.ll
@@ -0,0 +1,150 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+; PR12281
+; Test generataion of code for vmull instruction when multiplying 128-bit
+; vectors that were created by sign-extending smaller vector sizes.
+;
+; The vmull operation requires 64-bit vectors, so we must extend the original
+; vector size to 64 bits for vmull operation.
+; Previously failed with an assertion because the <4 x i8> vector was too small
+; for vmull.
+
+; Vector x Constant
+; v4i8
+;
+define void @sextload_v4i8_c(<4 x i8>* %v) nounwind {
+;CHECK: sextload_v4i8_c:
+entry:
+  %0 = load <4 x i8>* %v, align 8
+  %v0  = sext <4 x i8> %0 to <4 x i32>
+;CHECK: vmull
+  %v1 = mul <4 x i32>  %v0, <i32 3, i32 3, i32 3, i32 3>
+  store <4 x i32> %v1, <4 x i32>* undef, align 8
+  ret void;
+}
+
+; v2i8
+;
+define void @sextload_v2i8_c(<2 x i8>* %v) nounwind {
+;CHECK: sextload_v2i8_c:
+entry:
+  %0   = load <2 x i8>* %v, align 8
+  %v0  = sext <2 x i8>  %0 to <2 x i64>
+;CHECK: vmull
+  %v1  = mul <2 x i64>  %v0, <i64 3, i64 3>
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
+
+; v2i16
+;
+define void @sextload_v2i16_c(<2 x i16>* %v) nounwind {
+;CHECK: sextload_v2i16_c:
+entry:
+  %0   = load <2 x i16>* %v, align 8
+  %v0  = sext <2 x i16>  %0 to <2 x i64>
+;CHECK: vmull
+  %v1  = mul <2 x i64>  %v0, <i64 3, i64 3>
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
+
+
+; Vector x Vector
+; v4i8
+;
+define void @sextload_v4i8_v(<4 x i8>* %v, <4 x i8>* %p) nounwind {
+;CHECK: sextload_v4i8_v:
+entry:
+  %0 = load <4 x i8>* %v, align 8
+  %v0  = sext <4 x i8> %0 to <4 x i32>
+
+  %1  = load <4 x i8>* %p, align 8
+  %v2 = sext <4 x i8> %1 to <4 x i32>
+;CHECK: vmull
+  %v1 = mul <4 x i32>  %v0, %v2
+  store <4 x i32> %v1, <4 x i32>* undef, align 8
+  ret void;
+}
+
+; v2i8
+;
+define void @sextload_v2i8_v(<2 x i8>* %v, <2 x i8>* %p) nounwind {
+;CHECK: sextload_v2i8_v:
+entry:
+  %0 = load <2 x i8>* %v, align 8
+  %v0  = sext <2 x i8> %0 to <2 x i64>
+
+  %1  = load <2 x i8>* %p, align 8
+  %v2 = sext <2 x i8> %1 to <2 x i64>
+;CHECK: vmull
+  %v1 = mul <2 x i64>  %v0, %v2
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
+
+; v2i16
+;
+define void @sextload_v2i16_v(<2 x i16>* %v, <2 x i16>* %p) nounwind {
+;CHECK: sextload_v2i16_v:
+entry:
+  %0 = load <2 x i16>* %v, align 8
+  %v0  = sext <2 x i16> %0 to <2 x i64>
+
+  %1  = load <2 x i16>* %p, align 8
+  %v2 = sext <2 x i16> %1 to <2 x i64>
+;CHECK: vmull
+  %v1 = mul <2 x i64>  %v0, %v2
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
+
+
+; Vector(small) x Vector(big)
+; v4i8 x v4i16
+;
+define void @sextload_v4i8_vs(<4 x i8>* %v, <4 x i16>* %p) nounwind {
+;CHECK: sextload_v4i8_vs:
+entry:
+  %0 = load <4 x i8>* %v, align 8
+  %v0  = sext <4 x i8> %0 to <4 x i32>
+
+  %1  = load <4 x i16>* %p, align 8
+  %v2 = sext <4 x i16> %1 to <4 x i32>
+;CHECK: vmull
+  %v1 = mul <4 x i32>  %v0, %v2
+  store <4 x i32> %v1, <4 x i32>* undef, align 8
+  ret void;
+}
+
+; v2i8
+; v2i8 x v2i16
+define void @sextload_v2i8_vs(<2 x i8>* %v, <2 x i16>* %p) nounwind {
+;CHECK: sextload_v2i8_vs:
+entry:
+  %0 = load <2 x i8>* %v, align 8
+  %v0  = sext <2 x i8> %0 to <2 x i64>
+
+  %1  = load <2 x i16>* %p, align 8
+  %v2 = sext <2 x i16> %1 to <2 x i64>
+;CHECK: vmull
+  %v1 = mul <2 x i64>  %v0, %v2
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
+
+; v2i16
+; v2i16 x v2i32
+define void @sextload_v2i16_vs(<2 x i16>* %v, <2 x i32>* %p) nounwind {
+;CHECK: sextload_v2i16_vs:
+entry:
+  %0 = load <2 x i16>* %v, align 8
+  %v0  = sext <2 x i16> %0 to <2 x i64>
+
+  %1  = load <2 x i32>* %p, align 8
+  %v2 = sext <2 x i32> %1 to <2 x i64>
+;CHECK: vmull
+  %v1 = mul <2 x i64>  %v0, %v2
+  store <2 x i64> %v1, <2 x i64>* undef, align 8
+  ret void;
+}
diff --git a/test/CodeGen/ARM/alloc-no-stack-realign.ll b/test/CodeGen/ARM/alloc-no-stack-realign.ll
new file mode 100644
index 0000000000..273041dee3
--- /dev/null
+++ b/test/CodeGen/ARM/alloc-no-stack-realign.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios -O0 -realign-stack=0 | FileCheck %s -check-prefix=NO-REALIGN
+; RUN: llc < %s -mtriple=armv7-apple-ios -O0 | FileCheck %s
+
+; rdar://12713765
+; When realign-stack is set to false, make sure we are not creating stack
+; objects that are assumed to be 64-byte aligned.
+@T3_retval = common global <16 x float> zeroinitializer, align 16
+
+define void @test(<16 x float>* noalias sret %agg.result) nounwind ssp {
+entry:
+; CHECK: test
+; CHECK: bic sp, sp, #63
+; CHECK: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
+; CHECK: vst1.64
+; CHECK: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
+; CHECK: vst1.64
+; CHECK: orr [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
+; CHECK: vst1.64
+; CHECK: vst1.64
+; CHECK: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
+; CHECK: vst1.64
+; CHECK: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
+; CHECK: vst1.64
+; CHECK: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
+; CHECK: vst1.64
+; CHECK: vst1.64
+; NO-REALIGN: test
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
+; NO-REALIGN: vst1.64
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
+; NO-REALIGN: vst1.64
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
+; NO-REALIGN: vst1.64
+; NO-REALIGN: vst1.64
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #48
+; NO-REALIGN: vst1.64
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #32
+; NO-REALIGN: vst1.64
+; NO-REALIGN: add [[R2:r[0-9]+]], [[R1:r[0-9]+]], #16
+; NO-REALIGN: vst1.64
+; NO-REALIGN: vst1.64
+ %retval = alloca <16 x float>, align 16
+ %0 = load <16 x float>* @T3_retval, align 16
+ store <16 x float> %0, <16 x float>* %retval
+ %1 = load <16 x float>* %retval
+ store <16 x float> %1, <16 x float>* %agg.result, align 16
+ ret void
+}
diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll
index be51e3c129..69da6221b7 100644
--- a/test/CodeGen/ARM/atomic-64bit.ll
+++ b/test/CodeGen/ARM/atomic-64bit.ll
@@ -126,3 +126,64 @@ define void @test9(i64* %ptr, i64 %val) {
   store atomic i64 %val, i64* %ptr seq_cst, align 8
   ret void
 }
+
+define i64 @test10(i64* %ptr, i64 %val) {
+; CHECK: test10:
+; CHECK: dmb ish
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
+; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
+; CHECK: ble
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+  %r = atomicrmw min i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
+define i64 @test11(i64* %ptr, i64 %val) {
+; CHECK: test11:
+; CHECK: dmb ish
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
+; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
+; CHECK: bls
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+  %r = atomicrmw umin i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
+define i64 @test12(i64* %ptr, i64 %val) {
+; CHECK: test12:
+; CHECK: dmb ish
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
+; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
+; CHECK: bge
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+  %r = atomicrmw max i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
+define i64 @test13(i64* %ptr, i64 %val) {
+; CHECK: test13:
+; CHECK: dmb ish
+; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK: subs {{[a-z0-9]+}}, [[REG1]], [[REG3:(r[0-9]?[02468])]]
+; CHECK: sbcs {{[a-z0-9]+}}, [[REG2]], [[REG4:(r[0-9]?[13579])]]
+; CHECK: bhs
+; CHECK: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]]
+; CHECK: cmp
+; CHECK: bne
+; CHECK: dmb ish
+  %r = atomicrmw umax i64* %ptr, i64 %val seq_cst
+  ret i64 %r
+}
+
diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll
index 5bdad1d838..e7bd5f41bb 100644
--- a/test/CodeGen/ARM/coalesce-subregs.ll
+++ b/test/CodeGen/ARM/coalesce-subregs.ll
@@ -317,3 +317,44 @@ if.end4:                                          ; preds = %if.else3, %if.then2
   store <2 x i64> %result.2, <2 x i64>* %agg.result, align 128
   ret void
 }
+
+; <rdar://problem/12758887>
+; RegisterCoalescer::updateRegDefsUses() could visit an instruction more than
+; once under rare circumstances. When widening a register from QPR to DTriple
+; with the original virtual register in dsub_1_dsub_2, the double rewrite would
+; produce an invalid sub-register.
+;
+; This is because dsub_1_dsub_2 is not an idempotent sub-register index.
+; It will translate %vr:dsub_0 -> %vr:dsub_1.
+define hidden fastcc void @radar12758887() nounwind optsize ssp {
+entry:
+  br i1 undef, label %for.body, label %for.end70
+
+for.body:                                         ; preds = %for.end, %entry
+  br i1 undef, label %for.body29, label %for.end
+
+for.body29:                                       ; preds = %for.body29, %for.body
+  %0 = load <2 x double>* null, align 1
+  %splat40 = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer
+  %mul41 = fmul <2 x double> undef, %splat40
+  %add42 = fadd <2 x double> undef, %mul41
+  %splat44 = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  %mul45 = fmul <2 x double> undef, %splat44
+  %add46 = fadd <2 x double> undef, %mul45
+  br i1 undef, label %for.end, label %for.body29
+
+for.end:                                          ; preds = %for.body29, %for.body
+  %accumR2.0.lcssa = phi <2 x double> [ zeroinitializer, %for.body ], [ %add42, %for.body29 ]
+  %accumI2.0.lcssa = phi <2 x double> [ zeroinitializer, %for.body ], [ %add46, %for.body29 ]
+  %1 = shufflevector <2 x double> %accumI2.0.lcssa, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+  %add58 = fadd <2 x double> undef, %1
+  %mul61 = fmul <2 x double> %add58, undef
+  %add63 = fadd <2 x double> undef, %mul61
+  %add64 = fadd <2 x double> undef, %add63
+  %add67 = fadd <2 x double> undef, %add64
+  store <2 x double> %add67, <2 x double>* undef, align 1
+  br i1 undef, label %for.end70, label %for.body
+
+for.end70:                                        ; preds = %for.end, %entry
+  ret void
+}
diff --git a/test/CodeGen/ARM/crash.ll b/test/CodeGen/ARM/crash.ll
index 0f6f33e044..4e3e2010b0 100644
--- a/test/CodeGen/ARM/crash.ll
+++ b/test/CodeGen/ARM/crash.ll
@@ -69,3 +69,26 @@ bb:
   store <4 x float> %tmp154, <4 x float>* undef, align 16
   ret void
 }
+
+; <rdar://problem/12721258>
+%A = type { %B }
+%B = type { i32 }
+
+define void @_Z3Foov() ssp {
+entry:
+  br i1 true, label %exit, label %false
+
+false:
+  invoke void undef(%A* undef)
+          to label %exit unwind label %lpad
+
+lpad:
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*)
+          catch i8* null
+  unreachable
+
+exit:
+  ret void
+}
+
+declare i32 @__gxx_personality_sj0(...)
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
index 4f4ff8e817..a3a1fc0b2f 100644
--- a/test/CodeGen/ARM/debug-info-branch-folding.ll
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -52,7 +52,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !6 = metadata !{i32 590083, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
 !7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 589857, i64 0, i64 3}         ; [ DW_TAG_subrange_type ]
+!9 = metadata !{i32 589857, i64 0, i64 4}         ; [ DW_TAG_subrange_type ]
 !10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**, i1)* @main, null} ; [ DW_TAG_subprogram ]
 !11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{metadata !13}
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index 97c9c66c58..c6bfe3a25e 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -49,7 +49,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !6 = metadata !{i32 590083, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ]
 !7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !8 = metadata !{metadata !9}
-!9 = metadata !{i32 589857, i64 0, i64 3}         ; [ DW_TAG_subrange_type ]
+!9 = metadata !{i32 589857, i64 0, i64 4}         ; [ DW_TAG_subrange_type ]
 !10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ]
 !11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !12 = metadata !{metadata !13}
diff --git a/test/CodeGen/ARM/domain-conv-vmovs.ll b/test/CodeGen/ARM/domain-conv-vmovs.ll
index 0ebac94e13..b5586cc99f 100644
--- a/test/CodeGen/ARM/domain-conv-vmovs.ll
+++ b/test/CodeGen/ARM/domain-conv-vmovs.ll
@@ -78,7 +78,7 @@ define float @test_ineligible(float, float %in) {
   ; use-def chains would be messed up. Primarily a compile-test (we used to
   ; internal fault).
   call void @bar()
-; CHECL: bl bar
+; CHECK: bl bar
 ; CHECK: vext.32
 ; CHECK: vext.32
   ret float %val
diff --git a/test/CodeGen/ARM/elf-lcomm-align.ll b/test/CodeGen/ARM/elf-lcomm-align.ll
index 46792990e5..a98b3c06f5 100644
--- a/test/CodeGen/ARM/elf-lcomm-align.ll
+++ b/test/CodeGen/ARM/elf-lcomm-align.ll
@@ -4,8 +4,9 @@
 @c = internal global i8 0, align 1
 @x = internal global i32 0, align 4
 
-; CHECK: .lcomm c,1
-; .lcomm doesn't support alignment.
+; .lcomm doesn't support alignment, so we always use .local/.comm.
+; CHECK: .local c
+; CHECK-NEXT: .comm c,1,1
 ; CHECK: .local x
 ; CHECK-NEXT: .comm x,4,4
 
diff --git a/test/CodeGen/ARM/extload-knownzero.ll b/test/CodeGen/ARM/extload-knownzero.ll
new file mode 100644
index 0000000000..8fd6b6bd77
--- /dev/null
+++ b/test/CodeGen/ARM/extload-knownzero.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; rdar://12771555
+
+define void @foo(i16* %ptr, i32 %a) nounwind {
+entry:
+; CHECK: foo:
+  %tmp1 = icmp ult i32 %a, 100
+  br i1 %tmp1, label %bb1, label %bb2
+bb1:
+; CHECK: ldrh
+  %tmp2 = load i16* %ptr, align 2
+  br label %bb2
+bb2:
+; CHECK-NOT: uxth
+; CHECK: cmp
+  %tmp3 = phi i16 [ 0, %entry ], [ %tmp2, %bb1 ]
+  %cmp = icmp ult i16 %tmp3, 24
+  br i1 %cmp, label %bb3, label %exit
+bb3:
+  call void @bar() nounwind
+  br label %exit
+exit:
+  ret void
+}
+
+declare void @bar () 
diff --git a/test/CodeGen/ARM/fast-isel-icmp.ll b/test/CodeGen/ARM/fast-isel-icmp.ll
index 8764bef7da..8357ed5c54 100644
--- a/test/CodeGen/ARM/fast-isel-icmp.ll
+++ b/test/CodeGen/ARM/fast-isel-icmp.ll
@@ -1,6 +1,21 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
+define i32 @icmp_i16_signed(i16 %a, i16 %b) nounwind {
+entry:
+; ARM: icmp_i16_signed
+; ARM: sxth r0, r0
+; ARM: sxth r1, r1
+; ARM: cmp	r0, r1
+; THUMB: icmp_i16_signed
+; THUMB: sxth r0, r0
+; THUMB: sxth r1, r1
+; THUMB: cmp	r0, r1
+  %cmp = icmp slt i16 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
 define i32 @icmp_i16_unsigned(i16 %a, i16 %b) nounwind {
 entry:
 ; ARM: icmp_i16_unsigned
@@ -31,6 +46,21 @@ entry:
   ret i32 %conv2
 }
 
+define i32 @icmp_i8_unsigned(i8 %a, i8 %b) nounwind {
+entry:
+; ARM: icmp_i8_unsigned
+; ARM: uxtb r0, r0
+; ARM: uxtb r1, r1
+; ARM: cmp r0, r1
+; THUMB: icmp_i8_unsigned
+; THUMB: uxtb r0, r0
+; THUMB: uxtb r1, r1
+; THUMB: cmp r0, r1
+  %cmp = icmp ugt i8 %a, %b
+  %conv2 = zext i1 %cmp to i32
+  ret i32 %conv2
+}
+
 define i32 @icmp_i1_unsigned(i1 %a, i1 %b) nounwind {
 entry:
 ; ARM: icmp_i1_unsigned
diff --git a/test/CodeGen/ARM/fast-isel-indirectbr.ll b/test/CodeGen/ARM/fast-isel-indirectbr.ll
index be8035ec79..ebc0e8426d 100644
--- a/test/CodeGen/ARM/fast-isel-indirectbr.ll
+++ b/test/CodeGen/ARM/fast-isel-indirectbr.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define void @t1(i8* %x) {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index b73fceff6c..7d38cc2a7f 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -35,7 +35,7 @@ define void @t1() nounwind ssp {
 ; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
 ; THUMB-LONG: ldr r3, [r3]
 ; THUMB-LONG: blx r3
-  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 4, i1 false)
   ret void
 }
 
@@ -73,7 +73,7 @@ define void @t2() nounwind ssp {
 ; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
 ; THUMB-LONG: ldr r3, [r3]
 ; THUMB-LONG: blx r3
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 4, i1 false)
   ret void
 }
 
@@ -125,6 +125,7 @@ define void @t4() nounwind ssp {
 ; ARM: ldrh r1, [r0, #24]
 ; ARM: strh r1, [r0, #12]
 ; ARM: bx lr
+; THUMB: t4
 ; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
 ; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
 ; THUMB: ldr r0, [r0]
@@ -135,8 +136,98 @@ define void @t4() nounwind ssp {
 ; THUMB: ldrh r1, [r0, #24]
 ; THUMB: strh r1, [r0, #12]
 ; THUMB: bx lr
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 4, i1 false)
   ret void
 }
 
 declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define void @t5() nounwind ssp {
+; ARM: t5
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: ldrh r1, [r0, #16]
+; ARM: strh r1, [r0, #4]
+; ARM: ldrh r1, [r0, #18]
+; ARM: strh r1, [r0, #6]
+; ARM: ldrh r1, [r0, #20]
+; ARM: strh r1, [r0, #8]
+; ARM: ldrh r1, [r0, #22]
+; ARM: strh r1, [r0, #10]
+; ARM: ldrh r1, [r0, #24]
+; ARM: strh r1, [r0, #12]
+; ARM: bx lr
+; THUMB: t5
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: ldrh r1, [r0, #16]
+; THUMB: strh r1, [r0, #4]
+; THUMB: ldrh r1, [r0, #18]
+; THUMB: strh r1, [r0, #6]
+; THUMB: ldrh r1, [r0, #20]
+; THUMB: strh r1, [r0, #8]
+; THUMB: ldrh r1, [r0, #22]
+; THUMB: strh r1, [r0, #10]
+; THUMB: ldrh r1, [r0, #24]
+; THUMB: strh r1, [r0, #12]
+; THUMB: bx lr
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 2, i1 false)
+  ret void
+}
+
+define void @t6() nounwind ssp {
+; ARM: t6
+; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
+; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: ldr r0, [r0]
+; ARM: ldrb r1, [r0, #16]
+; ARM: strb r1, [r0, #4]
+; ARM: ldrb r1, [r0, #17]
+; ARM: strb r1, [r0, #5]
+; ARM: ldrb r1, [r0, #18]
+; ARM: strb r1, [r0, #6]
+; ARM: ldrb r1, [r0, #19]
+; ARM: strb r1, [r0, #7]
+; ARM: ldrb r1, [r0, #20]
+; ARM: strb r1, [r0, #8]
+; ARM: ldrb r1, [r0, #21]
+; ARM: strb r1, [r0, #9]
+; ARM: ldrb r1, [r0, #22]
+; ARM: strb r1, [r0, #10]
+; ARM: ldrb r1, [r0, #23]
+; ARM: strb r1, [r0, #11]
+; ARM: ldrb r1, [r0, #24]
+; ARM: strb r1, [r0, #12]
+; ARM: ldrb r1, [r0, #25]
+; ARM: strb r1, [r0, #13]
+; ARM: bx lr
+; THUMB: t6
+; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
+; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: ldr r0, [r0]
+; THUMB: ldrb r1, [r0, #16]
+; THUMB: strb r1, [r0, #4]
+; THUMB: ldrb r1, [r0, #17]
+; THUMB: strb r1, [r0, #5]
+; THUMB: ldrb r1, [r0, #18]
+; THUMB: strb r1, [r0, #6]
+; THUMB: ldrb r1, [r0, #19]
+; THUMB: strb r1, [r0, #7]
+; THUMB: ldrb r1, [r0, #20]
+; THUMB: strb r1, [r0, #8]
+; THUMB: ldrb r1, [r0, #21]
+; THUMB: strb r1, [r0, #9]
+; THUMB: ldrb r1, [r0, #22]
+; THUMB: strb r1, [r0, #10]
+; THUMB: ldrb r1, [r0, #23]
+; THUMB: strb r1, [r0, #11]
+; THUMB: ldrb r1, [r0, #24]
+; THUMB: strb r1, [r0, #12]
+; THUMB: ldrb r1, [r0, #25]
+; THUMB: strb r1, [r0, #13]
+; THUMB: bx lr
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
+  ret void
+}
diff --git a/test/CodeGen/ARM/fast-isel-pred.ll b/test/CodeGen/ARM/fast-isel-pred.ll
index 8de54ad533..27731def1f 100644
--- a/test/CodeGen/ARM/fast-isel-pred.ll
+++ b/test/CodeGen/ARM/fast-isel-pred.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -mtriple=armv7-apple-darwin < %s
+; RUN: llc -O0 -verify-machineinstrs -mtriple=armv7-apple-darwin < %s
 
 define i32 @main() nounwind ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll
index e50c3a4954..563880dab0 100644
--- a/test/CodeGen/ARM/fast-isel-redefinition.ll
+++ b/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -optimize-regalloc -regalloc=basic < %s
+; RUN: llc -O0 -verify-machineinstrs -optimize-regalloc -regalloc=basic < %s
 ; This isn't exactly a useful set of command-line options, but check that it
 ; doesn't crash.  (It was crashing because a register was getting redefined.)
 
diff --git a/test/CodeGen/ARM/fast-isel-static.ll b/test/CodeGen/ARM/fast-isel-static.ll
index a86e3251f7..e8759a7fc4 100644
--- a/test/CodeGen/ARM/fast-isel-static.ll
+++ b/test/CodeGen/ARM/fast-isel-static.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -relocation-model=static | FileCheck -check-prefix=NORM %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -relocation-model=static | FileCheck -check-prefix=NORM %s
 
 define void @myadd(float* %sum, float* %addend) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index 3ac7d77d6f..03abd762a2 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -45,3 +45,35 @@ for.cond1.preheader:                              ; preds = %entry
 }
 
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+; rdar://12462006
+define i8* @f3(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
+entry:
+; CHECK: f3:
+; CHECK-NOT: sub
+; CHECK: cmp
+; CHECK: blt
+%0 = load i32* %offset, align 4
+%cmp = icmp slt i32 %0, %size
+%s = sub nsw i32 %0, %size
+%size2 = sub nsw i32 %size, 0
+br i1 %cmp, label %return, label %if.end
+
+if.end:
+; We are checking cse between %sub here and %s in entry block.
+%sub = sub nsw i32 %0, %size2
+%s2 = sub nsw i32 %s, %size
+%s3 = sub nsw i32 %sub, %s2
+; CHECK: sub [[R1:r[0-9]+]], [[R2:r[0-9]+]], r2
+; CHECK: sub [[R3:r[0-9]+]], [[R1]], r2
+; CHECK: sub [[R4:r[0-9]+]], [[R1]], [[R3]]
+; CHECK-NOT: sub
+; CHECK: str
+store i32 %s3, i32* %offset, align 4
+%add.ptr = getelementptr inbounds i8* %base, i32 %sub
+br label %return
+
+return:
+%retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ]
+ret i8* %retval.0
+}
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index dc772827f2..d846e5cb26 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,18 +1,115 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-post-ra | FileCheck %s
-
-; CHECK: ldrd
-; CHECK: strd
-; CHECK: ldrb
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -pre-RA-sched=source -disable-post-ra | FileCheck %s
 
 %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
 
 @src = external global %struct.x
 @dst = external global %struct.x
 
-define i32 @t() {
+@.str1 = private unnamed_addr constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 1
+@.str2 = private unnamed_addr constant [36 x i8] c"DHRYSTONE PROGRAM, SOME STRING BLAH\00", align 1
+@.str3 = private unnamed_addr constant [24 x i8] c"DHRYSTONE PROGRAM, SOME\00", align 1
+@.str4 = private unnamed_addr constant [18 x i8] c"DHRYSTONE PROGR  \00", align 1
+@.str5 = private unnamed_addr constant [7 x i8] c"DHRYST\00", align 1
+@.str6 = private unnamed_addr constant [14 x i8] c"/tmp/rmXXXXXX\00", align 1
+@spool.splbuf = internal global [512 x i8] zeroinitializer, align 16
+
+define i32 @t0() {
 entry:
+; CHECK: t0:
+; CHECK: vldr [[REG1:d[0-9]+]],
+; CHECK: vstr [[REG1]], 
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false)
   ret i32 0
 }
 
+define void @t1(i8* nocapture %C) nounwind {
+entry:
+; CHECK: t1:
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: adds r0, #15
+; CHECK: adds r1, #15
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false)
+  ret void
+}
+
+define void @t2(i8* nocapture %C) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: ldr [[REG2:r[0-9]+]], [r1, #32]
+; CHECK: str [[REG2]], [r0, #32]
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: adds r0, #16
+; CHECK: adds r1, #16
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false)
+  ret void
+}
+
+define void @t3(i8* nocapture %C) nounwind {
+entry:
+; CHECK: t3:
+; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+; CHECK: adds r0, #16
+; CHECK: adds r1, #16
+; CHECK: vld1.8 {d{{[0-9]+}}}, [r1]
+; CHECK: vst1.8 {d{{[0-9]+}}}, [r0]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false)
+  ret void
+}
+
+define void @t4(i8* nocapture %C) nounwind {
+entry:
+; CHECK: t4:
+; CHECK: vld1.8 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1]
+; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false)
+  ret void
+}
+
+define void @t5(i8* nocapture %C) nounwind {
+entry:
+; CHECK: t5:
+; CHECK: movs [[REG5:r[0-9]+]], #0
+; CHECK: strb [[REG5]], [r0, #6]
+; CHECK: movw [[REG6:r[0-9]+]], #21587
+; CHECK: strh [[REG6]], [r0, #4]
+; CHECK: ldr [[REG7:r[0-9]+]], 
+; CHECK: str [[REG7]]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false)
+  ret void
+}
+
+define void @t6() nounwind {
+entry:
+; CHECK: t6:
+; CHECK: vld1.8 {[[REG8:d[0-9]+]]}, [r0]
+; CHECK: vstr [[REG8]], [r1]
+; CHECK: adds r1, #6
+; CHECK: adds r0, #6
+; CHECK: vld1.8
+; CHECK: vst1.16
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([512 x i8]* @spool.splbuf, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8]* @.str6, i64 0, i64 0), i64 14, i32 1, i1 false)
+  ret void
+}
+
+%struct.Foo = type { i32, i32, i32, i32 }
+
+define void @t7(%struct.Foo* nocapture %a, %struct.Foo* nocapture %b) nounwind {
+entry:
+; CHECK: t7
+; CHECK: vld1.32
+; CHECK: vst1.32
+  %0 = bitcast %struct.Foo* %a to i8*
+  %1 = bitcast %struct.Foo* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false)
+  ret void
+}
+
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/memset-inline.ll b/test/CodeGen/ARM/memset-inline.ll
new file mode 100644
index 0000000000..ee8c364338
--- /dev/null
+++ b/test/CodeGen/ARM/memset-inline.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -pre-RA-sched=source -disable-post-ra | FileCheck %s
+
+define void @t1(i8* nocapture %c) nounwind optsize {
+entry:
+; CHECK: t1:
+; CHECK: movs r1, #0
+; CHECK: str r1, [r0]
+; CHECK: str r1, [r0, #4]
+; CHECK: str r1, [r0, #8]
+  call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
+  ret void
+}
+
+define void @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; CHECK: add.w r1, r0, #10
+; CHECK: vmov.i32 {{q[0-9]+}}, #0x0
+; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+; CHECK: vst1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+  %buf = alloca [26 x i8], align 1
+  %0 = getelementptr inbounds [26 x i8]* %buf, i32 0, i32 0
+  call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false)
+  call void @something(i8* %0) nounwind
+  ret void
+}
+
+declare void @something(i8*) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/popcnt.ll b/test/CodeGen/ARM/popcnt.ll
new file mode 100644
index 0000000000..0b9c9467c2
--- /dev/null
+++ b/test/CodeGen/ARM/popcnt.ll
@@ -0,0 +1,191 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; Implement ctpop with vcnt
+
+define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
+;CHECK: vcnt8:
+;CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
+;CHECK: vcntQ8:
+;CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <4 x i16> @vcnt16(<4 x i16>* %A) nounwind {
+; CHECK: vcnt16:
+; CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vrev16.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vadd.i8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vuzp.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <8 x i16> @vcntQ16(<8 x i16>* %A) nounwind {
+; CHECK: vcntQ16:
+; CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vrev16.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vadd.i8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vuzp.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <2 x i32> @vcnt32(<2 x i32>* %A) nounwind {
+; CHECK: vcnt32:
+; CHECK: vcnt.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vrev16.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vadd.i8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vuzp.8 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
+; CHECK: vrev32.16 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vuzp.16 {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vcntQ32(<4 x i32>* %A) nounwind {
+; CHECK: vcntQ32:
+; CHECK: vcnt.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vrev16.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vadd.i8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vuzp.8 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
+; CHECK: vrev32.16 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vuzp.16 {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.ctpop.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
+
+define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
+;CHECK: vclz8:
+;CHECK: vclz.i8 {{d[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %tmp1, i1 0)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
+;CHECK: vclz16:
+;CHECK: vclz.i16 {{d[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %tmp1, i1 0)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
+;CHECK: vclz32:
+;CHECK: vclz.i32 {{d[0-9]+}}, {{d[0-9]+}}
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %tmp1, i1 0)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
+;CHECK: vclzQ8:
+;CHECK: vclz.i8 {{q[0-9]+}}, {{q[0-9]+}}
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %tmp1, i1 0)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
+;CHECK: vclzQ16:
+;CHECK: vclz.i16 {{q[0-9]+}}, {{q[0-9]+}}
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %tmp1, i1 0)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
+;CHECK: vclzQ32:
+;CHECK: vclz.i32 {{q[0-9]+}}, {{q[0-9]+}}
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %tmp1, i1 0)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.ctlz.v8i8(<8 x i8>, i1) nounwind readnone
+declare <4 x i16> @llvm.ctlz.v4i16(<4 x i16>, i1) nounwind readnone
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
+
+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) nounwind readnone
+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) nounwind readnone
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
+
+define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
+;CHECK: vclss8:
+;CHECK: vcls.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
+;CHECK: vclss16:
+;CHECK: vcls.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
+;CHECK: vclss32:
+;CHECK: vcls.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vclsQs8:
+;CHECK: vcls.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vclsQs16:
+;CHECK: vcls.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vclsQs32:
+;CHECK: vcls.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/reg_asc_order.ll b/test/CodeGen/ARM/reg_asc_order.ll
deleted file mode 100644
index d1d0ee5f3e..0000000000
--- a/test/CodeGen/ARM/reg_asc_order.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
-; Check that memcpy gets lowered to ldm/stm, at least in this very smple case.
-
-%struct.Foo = type { i32, i32, i32, i32 }
-
-define void @_Z10CopyStructP3FooS0_(%struct.Foo* nocapture %a, %struct.Foo* nocapture %b) nounwind {
-entry:
-;CHECK: ldm
-;CHECK: stm
-  %0 = bitcast %struct.Foo* %a to i8*
-  %1 = bitcast %struct.Foo* %b to i8*
-  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 16, i32 4, i1 false)
-  ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/ret_sret_vector.ll b/test/CodeGen/ARM/ret_sret_vector.ll
new file mode 100644
index 0000000000..9bb3519555
--- /dev/null
+++ b/test/CodeGen/ARM/ret_sret_vector.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios3.0.0"
+
+define <4 x double> @PR14337(<4 x double> %a, <4 x double> %b) {
+  %foo = fadd <4 x double>  %a, %b
+  ret <4 x double> %foo
+; CHECK: PR14337:
+; CHECK: vst1.64
+; CHECK: vst1.64
+}
diff --git a/test/CodeGen/ARM/subreg-remat.ll b/test/CodeGen/ARM/subreg-remat.ll
index 455bfce0f2..1bc0315354 100644
--- a/test/CodeGen/ARM/subreg-remat.ll
+++ b/test/CodeGen/ARM/subreg-remat.ll
@@ -12,7 +12,7 @@ target triple = "thumbv7-apple-ios"
 ;
 ; CHECK: f1
 ; CHECK: vmov    d0, r0, r0
-; CHECK: vldr s0, LCPI
+; CHECK: vldr s1, LCPI
 ; The vector must be spilled:
 ; CHECK: vstr d0,
 ; CHECK: asm clobber d0
@@ -20,8 +20,8 @@ target triple = "thumbv7-apple-ios"
 ; CHECK: vldr [[D16:d[0-9]+]],
 ; CHECK: vstr [[D16]], [r1]
 define void @f1(float %x, <2 x float>* %p) {
-  %v1 = insertelement <2 x float> undef, float %x, i32 1
-  %v2 = insertelement <2 x float> %v1, float 0x400921FB60000000, i32 0
+  %v1 = insertelement <2 x float> undef, float %x, i32 0
+  %v2 = insertelement <2 x float> %v1, float 0x400921FB60000000, i32 1
   %y = call double asm sideeffect "asm clobber $0", "=w,0,~{d1},~{d2},~{d3},~{d4},~{d5},~{d6},~{d7},~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15},~{d16},~{d17},~{d18},~{d19},~{d20},~{d21},~{d22},~{d23},~{d24},~{d25},~{d26},~{d27},~{d28},~{d29},~{d30},~{d31}"(<2 x float> %v2) nounwind
   store <2 x float> %v2, <2 x float>* %p, align 8
   ret void
diff --git a/test/CodeGen/Hexagon/args.ll b/test/CodeGen/Hexagon/args.ll
index 8a6efb620e..767a442612 100644
--- a/test/CodeGen/Hexagon/args.ll
+++ b/test/CodeGen/Hexagon/args.ll
@@ -1,6 +1,5 @@
 ; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hexagon-misched < %s | FileCheck %s
-; CHECK: r[[T0:[0-9]+]] = #7
-; CHECK: memw(r29 + #0) = r[[T0]]
+; CHECK: memw(r29{{ *}}+{{ *}}#0){{ *}}={{ *}}#7
 ; CHECK: r5 = #6
 ; CHECK: r0 = #1
 ; CHECK: r1 = #2
diff --git a/test/CodeGen/Hexagon/dualstore.ll b/test/CodeGen/Hexagon/dualstore.ll
index 9b27dda52c..067499530f 100644
--- a/test/CodeGen/Hexagon/dualstore.ll
+++ b/test/CodeGen/Hexagon/dualstore.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; Check that we generate dual stores in one packet in V4
 
-; CHECK: memw(r{{[0-9]+}} + #{{[0-9]+}}) = r{{[0-9]+}}
-; CHECK-NEXT: memw(r{{[0-9]+}} + #{{[0-9]+}}) = r{{[0-9]+}}
+; CHECK: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}={{ *}}#100000
+; CHECK-NEXT: memw(r{{[0-9]+}}{{ *}}+{{ *}}#{{[0-9]+}}){{ *}}={{ *}}#500000
 ; CHECK-NEXT: }
 
 @Reg = global i32 0, align 4
diff --git a/test/CodeGen/Hexagon/postinc-load.ll b/test/CodeGen/Hexagon/postinc-load.ll
index 4b5ea67090..855a347d74 100644
--- a/test/CodeGen/Hexagon/postinc-load.ll
+++ b/test/CodeGen/Hexagon/postinc-load.ll
@@ -1,4 +1,4 @@
-; RUN: true || llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 
 ; Check that post-increment load instructions are being generated.
 ; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}{{ *}}++{{ *}}#4{{ *}})
diff --git a/test/CodeGen/NVPTX/tuple-literal.ll b/test/CodeGen/NVPTX/tuple-literal.ll
new file mode 100644
index 0000000000..5c0cb2c15c
--- /dev/null
+++ b/test/CodeGen/NVPTX/tuple-literal.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_13
+
+define ptx_device void @test_function({i8, i8}*) {
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/vector-compare.ll b/test/CodeGen/NVPTX/vector-compare.ll
new file mode 100644
index 0000000000..2180499952
--- /dev/null
+++ b/test/CodeGen/NVPTX/vector-compare.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20
+
+; This test makes sure that the result of vector compares are properly
+; scalarized.  If codegen fails, then the type legalizer incorrectly
+; tried to promote <2 x i1> to <2 x i8> and instruction selection failed.
+
+define void @foo(<2 x i32>* %a, <2 x i32>* %b, i32* %r1, i32* %r2) {
+  %aval = load <2 x i32>* %a
+  %bval = load <2 x i32>* %b
+  %res = icmp slt <2 x i32> %aval, %bval
+  %t1 = extractelement <2 x i1> %res, i32 0
+  %t2 = extractelement <2 x i1> %res, i32 1
+  %t1a = zext i1 %t1 to i32
+  %t2a = zext i1 %t2 to i32
+  store i32 %t1a, i32* %r1
+  store i32 %t2a, i32* %r2
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/vector-select.ll b/test/CodeGen/NVPTX/vector-select.ll
new file mode 100644
index 0000000000..11893df103
--- /dev/null
+++ b/test/CodeGen/NVPTX/vector-select.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20
+
+; This test makes sure that vector selects are scalarized by the type legalizer.
+; If not, type legalization will fail.
+
+define void @foo(<2 x i32> addrspace(1)* %def_a, <2 x i32> addrspace(1)* %def_b, <2 x i32> addrspace(1)* %def_c) {
+entry:
+  %tmp4 = load <2 x i32> addrspace(1)* %def_a
+  %tmp6 = load <2 x i32> addrspace(1)* %def_c
+  %tmp8 = load <2 x i32> addrspace(1)* %def_b
+  %0 = icmp sge <2 x i32> %tmp4, zeroinitializer
+  %cond = select <2 x i1> %0, <2 x i32> %tmp6, <2 x i32> %tmp8
+  store <2 x i32> %cond, <2 x i32> addrspace(1)* %def_c
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/buildvec_canonicalize.ll b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
index 0454c584bc..e155a35c4d 100644
--- a/test/CodeGen/PowerPC/buildvec_canonicalize.ll
+++ b/test/CodeGen/PowerPC/buildvec_canonicalize.ll
@@ -1,10 +1,4 @@
-; There should be exactly one vxor here.
-; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
-; RUN:   grep vxor | count 1
-
-; There should be exactly one vsplti here.
-; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
-; RUN:   grep vsplti | count 1
+; RUN: llc < %s -march=ppc32 -mattr=+altivec --enable-unsafe-fp-math | FileCheck %s
 
 define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
         %tmp = load <4 x float>* %P3            ; <<4 x float>> [#uses=1]
@@ -15,10 +9,16 @@ define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
         store <4 x i32> zeroinitializer, <4 x i32>* %P2
         ret void
 }
+; The fmul will spill a vspltisw to create a -0.0 vector used as the addend
+; to vmaddfp (so it would IEEE compliant with zero sign propagation).
+; CHECK: @VXOR
+; CHECK: vsplti
+; CHECK: vxor
 
 define void @VSPLTI(<4 x i32>* %P2, <8 x i16>* %P3) {
         store <4 x i32> bitcast (<16 x i8> < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > to <4 x i32>), <4 x i32>* %P2
         store <8 x i16> < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >, <8 x i16>* %P3
         ret void
 }
-
+; CHECK: @VSPLTI
+; CHECK: vsplti
diff --git a/test/CodeGen/PowerPC/mcm-1.ll b/test/CodeGen/PowerPC/mcm-1.ll
new file mode 100644
index 0000000000..62fe88c2b8
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-1.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+
+; Test correct code generation for medium code model (32-bit TOC offsets)
+; for loading and storing an external variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ei = external global i32
+
+define signext i32 @test_external() nounwind {
+entry:
+  %0 = load i32* @ei, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ei, align 4
+  ret i32 %0
+}
+
+; CHECK: test_external:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-2.ll b/test/CodeGen/PowerPC/mcm-2.ll
new file mode 100644
index 0000000000..45df0ab14f
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-2.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+
+; Test correct code generation for medium code model (32-bit TOC offsets)
+; for loading and storing a static variable scoped to a function.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+  %0 = load i32* @test_fn_static.si, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @test_fn_static.si, align 4
+  ret i32 %0
+}
+
+; CHECK: test_fn_static:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .type [[VAR]],@object
+; CHECK: .local [[VAR]]
+; CHECK: .comm [[VAR]],4,4
diff --git a/test/CodeGen/PowerPC/mcm-3.ll b/test/CodeGen/PowerPC/mcm-3.ll
new file mode 100644
index 0000000000..0e7bbe798b
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-3.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+
+; Test correct code generation for medium code model (32-bit TOC offsets)
+; for loading and storing a file-scope static variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+  %0 = load i32* @gi, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @gi, align 4
+  ret i32 %0
+}
+
+; CHECK: test_file_static:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR:[a-z0-9A-Z_.]+]]@toc@ha
+; CHECK: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .type [[VAR]],@object
+; CHECK: .data
+; CHECK: .globl [[VAR]]
+; CHECK: [[VAR]]:
+; CHECK: .long 5
diff --git a/test/CodeGen/PowerPC/mcm-4.ll b/test/CodeGen/PowerPC/mcm-4.ll
new file mode 100644
index 0000000000..db36d0bcf7
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-4.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+
+; Test correct code generation for medium code model (32-bit TOC offsets)
+; for loading a value from the constant pool (TOC-relative).
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define double @test_double_const() nounwind {
+entry:
+  ret double 0x3F4FD4920B498CF0
+}
+
+; CHECK: [[VAR:[a-z0-9A-Z_.]+]]:
+; CHECK: .quad 4562098671269285104
+; CHECK: test_double_const:
+; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha
+; CHECK: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l
+; CHECK: lfd {{[0-9]+}}, 0([[REG2]])
diff --git a/test/CodeGen/PowerPC/mcm-5.ll b/test/CodeGen/PowerPC/mcm-5.ll
new file mode 100644
index 0000000000..10d89f5215
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-5.ll
@@ -0,0 +1,59 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+
+; Test correct code generation for medium code model (32-bit TOC offsets)
+; for loading the address of a jump table from the TOC.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define signext i32 @test_jump_table(i32 signext %i) nounwind {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32* %i.addr, align 4
+  switch i32 %0, label %sw.default [
+    i32 3, label %sw.bb
+    i32 4, label %sw.bb1
+    i32 5, label %sw.bb2
+    i32 6, label %sw.bb3
+  ]
+
+sw.default:                                       ; preds = %entry
+  br label %sw.epilog
+
+sw.bb:                                            ; preds = %entry
+  %1 = load i32* %i.addr, align 4
+  %mul = mul nsw i32 %1, 7
+  store i32 %mul, i32* %i.addr, align 4
+  br label %sw.bb1
+
+sw.bb1:                                           ; preds = %entry, %sw.bb
+  %2 = load i32* %i.addr, align 4
+  %dec = add nsw i32 %2, -1
+  store i32 %dec, i32* %i.addr, align 4
+  br label %sw.bb2
+
+sw.bb2:                                           ; preds = %entry, %sw.bb1
+  %3 = load i32* %i.addr, align 4
+  %add = add nsw i32 %3, 3
+  store i32 %add, i32* %i.addr, align 4
+  br label %sw.bb3
+
+sw.bb3:                                           ; preds = %entry, %sw.bb2
+  %4 = load i32* %i.addr, align 4
+  %shl = shl i32 %4, 1
+  store i32 %shl, i32* %i.addr, align 4
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb3, %sw.default
+  %5 = load i32* %i.addr, align 4
+  ret i32 %5
+}
+
+; CHECK: test_jump_table:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: ldx {{[0-9]+}}, {{[0-9]+}}, [[REG2]]
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-6.ll b/test/CodeGen/PowerPC/mcm-6.ll
new file mode 100644
index 0000000000..0a7fa762d4
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-6.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium < %s | FileCheck %s
+
+; Test correct code generation for medium code model (32-bit TOC offsets)
+; for loading and storing a tentatively defined variable.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ti = common global i32 0, align 4
+
+define signext i32 @test_tentative() nounwind {
+entry:
+  %0 = load i32* @ti, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ti, align 4
+  ret i32 %0
+}
+
+; CHECK: test_tentative:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc [[VAR:[a-z0-9A-Z_.]+]][TC],{{[a-z0-9A-Z_.]+}}
+; CHECK: .comm [[VAR]],4,4
diff --git a/test/CodeGen/PowerPC/mcm-7.ll b/test/CodeGen/PowerPC/mcm-7.ll
new file mode 100644
index 0000000000..0e9fa2b38b
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-7.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium < %s | FileCheck %s
+
+; Test correct code generation for medium code model (32-bit TOC offsets)
+; for loading a function address.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i8* @test_fnaddr() nounwind {
+entry:
+  %func = alloca i32 (i32)*, align 8
+  store i32 (i32)* @foo, i32 (i32)** %func, align 8
+  %0 = load i32 (i32)** %func, align 8
+  %1 = bitcast i32 (i32)* %0 to i8*
+  ret i8* %1
+}
+
+declare signext i32 @foo(i32 signext)
+
+; CHECK: test_fnaddr:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-default.ll b/test/CodeGen/PowerPC/mcm-default.ll
new file mode 100644
index 0000000000..19de2536ae
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-default.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mcpu=pwr7 -O0 <%s | FileCheck %s
+
+; Test that we generate code for the medium model as the default.
+; Use an external variable reference as an example.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ei = external global i32
+
+define signext i32 @test_external() nounwind {
+entry:
+  %0 = load i32* @ei, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ei, align 4
+  ret i32 %0
+}
+
+; CHECK: test_external:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/mcm-obj.ll b/test/CodeGen/PowerPC/mcm-obj.ll
new file mode 100644
index 0000000000..ec1b7b0084
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-obj.ll
@@ -0,0 +1,193 @@
+; RUN: llc -O0 -mcpu=pwr7 -code-model=medium -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+; FIXME: When asm-parse is available, could make this an assembly test.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@ei = external global i32
+
+define signext i32 @test_external() nounwind {
+entry:
+  %0 = load i32* @ei, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ei, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing external variable ei.
+;
+; CHECK:       '.rela.text'
+; CHECK:       Relocation 0
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 1
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x00000040
+
+@test_fn_static.si = internal global i32 0, align 4
+
+define signext i32 @test_fn_static() nounwind {
+entry:
+  %0 = load i32* @test_fn_static.si, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @test_fn_static.si, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing function-scoped variable si.
+;
+; CHECK:       Relocation 2
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM2:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 3
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM2]]
+; CHECK-NEXT:  'r_type', 0x00000030
+
+@gi = global i32 5, align 4
+
+define signext i32 @test_file_static() nounwind {
+entry:
+  %0 = load i32* @gi, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @gi, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing file-scope variable gi.
+;
+; CHECK:       Relocation 4
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM3:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 5
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM3]]
+; CHECK-NEXT:  'r_type', 0x00000030
+
+define double @test_double_const() nounwind {
+entry:
+  ret double 0x3F4FD4920B498CF0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO for
+; accessing a constant.
+;
+; CHECK:       Relocation 6
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM4:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 7
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM4]]
+; CHECK-NEXT:  'r_type', 0x00000030
+
+define signext i32 @test_jump_table(i32 signext %i) nounwind {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32* %i.addr, align 4
+  switch i32 %0, label %sw.default [
+    i32 3, label %sw.bb
+    i32 4, label %sw.bb1
+    i32 5, label %sw.bb2
+    i32 6, label %sw.bb3
+  ]
+
+sw.default:                                       ; preds = %entry
+  br label %sw.epilog
+
+sw.bb:                                            ; preds = %entry
+  %1 = load i32* %i.addr, align 4
+  %mul = mul nsw i32 %1, 7
+  store i32 %mul, i32* %i.addr, align 4
+  br label %sw.bb1
+
+sw.bb1:                                           ; preds = %entry, %sw.bb
+  %2 = load i32* %i.addr, align 4
+  %dec = add nsw i32 %2, -1
+  store i32 %dec, i32* %i.addr, align 4
+  br label %sw.bb2
+
+sw.bb2:                                           ; preds = %entry, %sw.bb1
+  %3 = load i32* %i.addr, align 4
+  %add = add nsw i32 %3, 3
+  store i32 %add, i32* %i.addr, align 4
+  br label %sw.bb3
+
+sw.bb3:                                           ; preds = %entry, %sw.bb2
+  %4 = load i32* %i.addr, align 4
+  %shl = shl i32 %4, 1
+  store i32 %shl, i32* %i.addr, align 4
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb3, %sw.default
+  %5 = load i32* %i.addr, align 4
+  ret i32 %5
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing a jump table address.
+;
+; CHECK:       Relocation 8
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM5:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 9
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM5]]
+; CHECK-NEXT:  'r_type', 0x00000040
+
+@ti = common global i32 0, align 4
+
+define signext i32 @test_tentative() nounwind {
+entry:
+  %0 = load i32* @ti, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @ti, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing tentatively declared variable ti.
+;
+; CHECK:       Relocation 10
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM6:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 11
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM6]]
+; CHECK-NEXT:  'r_type', 0x00000040
+
+define i8* @test_fnaddr() nounwind {
+entry:
+  %func = alloca i32 (i32)*, align 8
+  store i32 (i32)* @foo, i32 (i32)** %func, align 8
+  %0 = load i32 (i32)** %func, align 8
+  %1 = bitcast i32 (i32)* %0 to i8*
+  ret i8* %1
+}
+
+declare signext i32 @foo(i32 signext)
+
+; Verify generation of R_PPC64_TOC16_HA and R_PPC64_TOC16_LO_DS for
+; accessing function address foo.
+;
+; CHECK:       Relocation 12
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM7:[0-9]+]]
+; CHECK-NEXT:  'r_type', 0x00000032
+; CHECK:       Relocation 13
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM7]]
+; CHECK-NEXT:  'r_type', 0x00000040
+
diff --git a/test/CodeGen/PowerPC/s000-alias-misched.ll b/test/CodeGen/PowerPC/s000-alias-misched.ll
new file mode 100644
index 0000000000..d03ee8738e
--- /dev/null
+++ b/test/CodeGen/PowerPC/s000-alias-misched.ll
@@ -0,0 +1,101 @@
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+; RUN: llc < %s -enable-misched -march=ppc64 -mcpu=a2 | FileCheck %s
+; RUN: llc < %s -enable-misched -enable-aa-sched-mi -march=ppc64 -mcpu=a2 | FileCheck %s
+
+@aa = external global [256 x [256 x double]], align 32
+@bb = external global [256 x [256 x double]], align 32
+@cc = external global [256 x [256 x double]], align 32
+@.str1 = external hidden unnamed_addr constant [6 x i8], align 1
+@X = external global [16000 x double], align 32
+@Y = external global [16000 x double], align 32
+@Z = external global [16000 x double], align 32
+@U = external global [16000 x double], align 32
+@V = external global [16000 x double], align 32
+@.str137 = external hidden unnamed_addr constant [14 x i8], align 1
+
+declare void @check(i32 signext) nounwind
+
+declare signext i32 @printf(i8* nocapture, ...) nounwind
+
+declare signext i32 @init(i8*) nounwind
+
+define signext i32 @s000() nounwind {
+entry:
+  %call = tail call signext i32 @init(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0))
+  %call1 = tail call i64 @clock() nounwind
+  br label %for.cond2.preheader
+
+; CHECK: @s000
+
+for.cond2.preheader:                              ; preds = %for.end, %entry
+  %nl.018 = phi i32 [ 0, %entry ], [ %inc9, %for.end ]
+  br label %for.body4
+
+for.body4:                                        ; preds = %for.body4, %for.cond2.preheader
+  %indvars.iv = phi i64 [ 0, %for.cond2.preheader ], [ %indvars.iv.next.15, %for.body4 ]
+  %arrayidx = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv
+  %arrayidx6 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv
+  %0 = bitcast double* %arrayidx to <1 x double>*
+  %1 = load <1 x double>* %0, align 32, !tbaa !0
+  %add = fadd <1 x double> %1, <double 1.000000e+00>
+  %2 = bitcast double* %arrayidx6 to <1 x double>*
+  store <1 x double> %add, <1 x double>* %2, align 32, !tbaa !0
+  %indvars.iv.next.322 = or i64 %indvars.iv, 4
+  %arrayidx.4 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.322
+  %arrayidx6.4 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.322
+  %3 = bitcast double* %arrayidx.4 to <1 x double>*
+  %4 = load <1 x double>* %3, align 32, !tbaa !0
+  %add.4 = fadd <1 x double> %4, <double 1.000000e+00>
+  %5 = bitcast double* %arrayidx6.4 to <1 x double>*
+  store <1 x double> %add.4, <1 x double>* %5, align 32, !tbaa !0
+  %indvars.iv.next.726 = or i64 %indvars.iv, 8
+  %arrayidx.8 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.726
+  %arrayidx6.8 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.726
+  %6 = bitcast double* %arrayidx.8 to <1 x double>*
+  %7 = load <1 x double>* %6, align 32, !tbaa !0
+  %add.8 = fadd <1 x double> %7, <double 1.000000e+00>
+  %8 = bitcast double* %arrayidx6.8 to <1 x double>*
+  store <1 x double> %add.8, <1 x double>* %8, align 32, !tbaa !0
+  %indvars.iv.next.1130 = or i64 %indvars.iv, 12
+  %arrayidx.12 = getelementptr inbounds [16000 x double]* @Y, i64 0, i64 %indvars.iv.next.1130
+  %arrayidx6.12 = getelementptr inbounds [16000 x double]* @X, i64 0, i64 %indvars.iv.next.1130
+  %9 = bitcast double* %arrayidx.12 to <1 x double>*
+  %10 = load <1 x double>* %9, align 32, !tbaa !0
+  %add.12 = fadd <1 x double> %10, <double 1.000000e+00>
+  %11 = bitcast double* %arrayidx6.12 to <1 x double>*
+  store <1 x double> %add.12, <1 x double>* %11, align 32, !tbaa !0
+  %indvars.iv.next.15 = add i64 %indvars.iv, 16
+  %lftr.wideiv.15 = trunc i64 %indvars.iv.next.15 to i32
+  %exitcond.15 = icmp eq i32 %lftr.wideiv.15, 16000
+  br i1 %exitcond.15, label %for.end, label %for.body4
+
+; All of the loads should come before all of the stores.
+; CHECK: mtctr
+; CHECK: stfd
+; CHECK-NOT: lfd
+; CHECK: bdnz
+
+for.end:                                          ; preds = %for.body4
+  %call7 = tail call signext i32 @dummy(double* getelementptr inbounds ([16000 x double]* @X, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Y, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @Z, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @U, i64 0, i64 0), double* getelementptr inbounds ([16000 x double]* @V, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @aa, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @bb, i64 0, i64 0), [256 x double]* getelementptr inbounds ([256 x [256 x double]]* @cc, i64 0, i64 0), double 0.000000e+00) nounwind
+  %inc9 = add nsw i32 %nl.018, 1
+  %exitcond = icmp eq i32 %inc9, 400000
+  br i1 %exitcond, label %for.end10, label %for.cond2.preheader
+
+for.end10:                                        ; preds = %for.end
+  %call11 = tail call i64 @clock() nounwind
+  %sub = sub nsw i64 %call11, %call1
+  %conv = sitofp i64 %sub to double
+  %div = fdiv double %conv, 1.000000e+06
+  %call12 = tail call signext i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str137, i64 0, i64 0), double %div) nounwind
+  tail call void @check(i32 signext 1)
+  ret i32 0
+}
+
+declare i64 @clock() nounwind
+
+declare signext i32 @dummy(double*, double*, double*, double*, double*, [256 x double]*, [256 x double]*, [256 x double]*, double)
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/PowerPC/tls-ie-obj.ll b/test/CodeGen/PowerPC/tls-ie-obj.ll
new file mode 100644
index 0000000000..5cc0b187f6
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ie-obj.ll
@@ -0,0 +1,32 @@
+; RUN: llc -mcpu=pwr7 -O0 -filetype=obj %s -o - | \
+; RUN: elf-dump --dump-section-data | FileCheck %s
+
+; Test correct relocation generation for thread-local storage
+; using the initial-exec model and integrated assembly.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = external thread_local global i32
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; Verify generation of R_PPC64_GOT_TPREL16_DS and R_PPC64_TLS for
+; accessing external variable a.
+;
+; CHECK:       '.rela.text'
+; CHECK:       Relocation 0
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1:[0-9a-f]+]]
+; CHECK-NEXT:  'r_type', 0x00000057
+; CHECK:       Relocation 1
+; CHECK-NEXT:  'r_offset'
+; CHECK-NEXT:  'r_sym', 0x[[SYM1]]
+; CHECK-NEXT:  'r_type', 0x00000043
+
diff --git a/test/CodeGen/PowerPC/tls-ie.ll b/test/CodeGen/PowerPC/tls-ie.ll
new file mode 100644
index 0000000000..cc6f084efb
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls-ie.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mcpu=pwr7 -O0 <%s | FileCheck %s
+
+; Test correct assembly code generation for thread-local storage
+; using the initial-exec model.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@a = external thread_local global i32
+
+define signext i32 @main() nounwind {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  %0 = load i32* @a, align 4
+  ret i32 %0
+}
+
+; CHECK: ld [[REG:[0-9]+]], a@got@tprel(2)
+; CHECK: add {{[0-9]+}}, [[REG]], a@tls
+
diff --git a/test/CodeGen/PowerPC/vec_mul.ll b/test/CodeGen/PowerPC/vec_mul.ll
index 80f4de4a17..53bc75dd10 100644
--- a/test/CodeGen/PowerPC/vec_mul.ll
+++ b/test/CodeGen/PowerPC/vec_mul.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep mullw
-; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vmsumuhm
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -march=ppc32 -mattr=+altivec | FileCheck %s
 
 define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 	%tmp = load <4 x i32>* %X		; <<4 x i32>> [#uses=1]
@@ -7,6 +6,9 @@ define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
 	%tmp3 = mul <4 x i32> %tmp, %tmp2		; <<4 x i32>> [#uses=1]
 	ret <4 x i32> %tmp3
 }
+; CHECK: test_v4i32:
+; CHECK: vmsumuhm
+; CHECK-NOT: mullw
 
 define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
 	%tmp = load <8 x i16>* %X		; <<8 x i16>> [#uses=1]
@@ -14,6 +16,9 @@ define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
 	%tmp3 = mul <8 x i16> %tmp, %tmp2		; <<8 x i16>> [#uses=1]
 	ret <8 x i16> %tmp3
 }
+; CHECK: test_v8i16:
+; CHECK: vmladduhm
+; CHECK-NOT: mullw
 
 define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
 	%tmp = load <16 x i8>* %X		; <<16 x i8>> [#uses=1]
@@ -21,3 +26,21 @@ define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
 	%tmp3 = mul <16 x i8> %tmp, %tmp2		; <<16 x i8>> [#uses=1]
 	ret <16 x i8> %tmp3
 }
+; CHECK: test_v16i8:
+; CHECK: vmuloub
+; CHECK: vmuleub
+; CHECK-NOT: mullw
+
+define <4 x float> @test_float(<4 x float>* %X, <4 x float>* %Y) {
+	%tmp = load <4 x float>* %X
+	%tmp2 = load <4 x float>* %Y
+	%tmp3 = fmul <4 x float> %tmp, %tmp2
+	ret <4 x float> %tmp3
+}
+; Check the creation of a negative zero float vector by creating a vector of
+; all bits set and shifting it 31 bits to left, resulting a an vector of 
+; 4 x 0x80000000 (-0.0 as float).
+; CHECK: test_float:
+; CHECK: vspltisw [[ZNEG:[0-9]+]], -1
+; CHECK: vslw     {{[0-9]+}}, [[ZNEG]], [[ZNEG]]
+; CHECK: vmaddfp
diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
index 9f5a677ed3..498c78165e 100644
--- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
+++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
@@ -61,7 +61,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
 !12 = metadata !{i32 524289, metadata !4, metadata !"", metadata !4, i32 0, i64 192, i64 32, i64 0, i32 0, metadata !13, metadata !14, i32 0, null} ; [ DW_TAG_array_type ]
 !13 = metadata !{i32 524324, metadata !4, metadata !"double", metadata !4, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
 !14 = metadata !{metadata !15}
-!15 = metadata !{i32 524321, i64 0, i64 2}        ; [ DW_TAG_subrange_type ]
+!15 = metadata !{i32 524321, i64 0, i64 3}        ; [ DW_TAG_subrange_type ]
 !16 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 72, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
 !17 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null} ; [ DW_TAG_subroutine_type ]
 !18 = metadata !{null, metadata !19, metadata !20}
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
index ac059bdaf0..a8134e6308 100644
--- a/test/CodeGen/Thumb2/thumb2-mul.ll
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -15,7 +15,7 @@ entry:
 ; CHECK: t1:
 ; CHECK: mla     r0, r2, r0, r1
 ; CHECK: add.w   r0, r0, r0, lsl #3
-; CHECL: add.w   r0, r3, r0, lsl #2
+; CHECK: add.w   r0, r3, r0, lsl #2
   %mul = mul i32 %n, %i
   %add = add i32 %mul, %j
   %0 = ptrtoint %struct.CMPoint* %thePoints to i32
diff --git a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
index 0dca14d064..890fd0f067 100644
--- a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
+++ b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
@@ -78,7 +78,7 @@ declare void @llvm.stackrestore(i8*) nounwind
 !9 = metadata !{i32 458767, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
 !10 = metadata !{i32 458753, metadata !2, metadata !"", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !5, metadata !11, i32 0, null} ; [ DW_TAG_array_type ]
 !11 = metadata !{metadata !12}
-!12 = metadata !{i32 458785, i64 0, i64 0}        ; [ DW_TAG_subrange_type ]
+!12 = metadata !{i32 458785, i64 0, i64 1}        ; [ DW_TAG_subrange_type ]
 !13 = metadata !{i32 3, i32 0, metadata !14, null}
 !14 = metadata !{i32 458763, metadata !1, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
 !15 = metadata !{i32 4, i32 0, metadata !14, null}
diff --git a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
index 94075e78a2..c2d9d84d4c 100644
--- a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
+++ b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
@@ -6,15 +6,16 @@
 define void @t(i32 %count) ssp nounwind {
 entry:
 ; CHECK: t:
-; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
-; CHECK: movups L_str(%rip), %xmm0
+; CHECK: movups L_str+12(%rip), %xmm0
+; CHECK: movups L_str(%rip), %xmm1
   %tmp0 = alloca [60 x i8], align 1
   %tmp1 = getelementptr inbounds [60 x i8]* %tmp0, i64 0, i64 0
   br label %bb1
 
 bb1:
 ; CHECK: LBB0_1:
-; CHECK: movaps %xmm0, (%rsp)
+; CHECK: movups %xmm0, 12(%rsp)
+; CHECK: movaps %xmm1, (%rsp)
   %tmp2 = phi i32 [ %tmp3, %bb1 ], [ 0, %entry ]
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1, i1 false)
   %tmp3 = add i32 %tmp2, 1
diff --git a/test/CodeGen/X86/2010-07-06-DbgCrash.ll b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
index edd6015b0d..208e93e098 100644
--- a/test/CodeGen/X86/2010-07-06-DbgCrash.ll
+++ b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
@@ -16,7 +16,7 @@
 !103 = metadata !{i32 524299, metadata !97, i32 73, i32 0} ; [ DW_TAG_lexical_block ]
 !104 = metadata !{i32 524289, metadata !38, metadata !"", metadata !38, i32 0, i64 85312, i64 64, i64 0, i32 0, metadata !46, metadata !105, i32 0, null} ; [ DW_TAG_array_type ]
 !105 = metadata !{metadata !106}
-!106 = metadata !{i32 524321, i64 0, i64 1332}    ; [ DW_TAG_subrange_type ]
+!106 = metadata !{i32 524321, i64 0, i64 1333}    ; [ DW_TAG_subrange_type ]
 !107 = metadata !{i32 73, i32 0, metadata !103, null}
 
 define i32 @main() nounwind ssp {
diff --git a/test/CodeGen/X86/2011-11-30-or.ll b/test/CodeGen/X86/2011-11-30-or.ll
index 0a949eb29b..f66248bc5a 100644
--- a/test/CodeGen/X86/2011-11-30-or.ll
+++ b/test/CodeGen/X86/2011-11-30-or.ll
@@ -11,12 +11,12 @@ target triple = "x86_64-apple-macosx10.6.6"
 define void @select_func() {
 entry:
   %c.lobit.i.i.i = ashr <8 x i16> <i16 17, i16 5, i16 1, i16 15, i16 19, i16 15, i16 4, i16 1> , <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-  %a35 = bitcast <8 x i16> %c.lobit.i.i.i to <2 x i64>
   %and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16 65, i16 25, i16 8, i16 95, i16 15, i16 45>
   %and.i5.i.i.i = bitcast <8 x i16> %and.i56.i.i.i to <2 x i64>
-  %neg.i.i.i.i = xor <2 x i64> %a35, <i64 -1, i64 -1>
-  %and.i.i.i.i = and <2 x i64> zeroinitializer, %neg.i.i.i.i
-  %or.i.i.i.i = or <2 x i64> %and.i.i.i.i, %and.i5.i.i.i
+  %neg.i.i.i.i = xor <8 x i16> %c.lobit.i.i.i, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %and.i.i.i = and <8 x i16> %neg.i.i.i.i, <i16 45, i16 15, i16 95, i16 8, i16 25, i16 65, i16 8, i16 25>
+  %and.i2.i.i.i = bitcast <8 x i16> %and.i.i.i to <2 x i64>
+  %or.i.i.i.i = or <2 x i64> %and.i2.i.i.i, %and.i5.i.i.i
   %a37 = bitcast <2 x i64> %or.i.i.i.i to <8 x i16>
   store <8 x i16> %a37, <8 x i16> addrspace(1)* undef, align 4
   ret void
diff --git a/test/CodeGen/X86/2011-12-28-vselecti8.ll b/test/CodeGen/X86/2011-12-28-vselecti8.ll
index dbc122ac6e..1a9d46d1e2 100644
--- a/test/CodeGen/X86/2011-12-28-vselecti8.ll
+++ b/test/CodeGen/X86/2011-12-28-vselecti8.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-darwin11.2.0"
 
 ; CHECK: @foo8
 ; CHECK: psll
-; CHECK: psraw
+; CHECK-NOT: psraw
 ; CHECK: pblendvb
 ; CHECK: ret
 define void @foo8(float* nocapture %RET) nounwind {
diff --git a/test/CodeGen/Generic/2012-07-15-BuildVectorPromote.ll b/test/CodeGen/X86/2012-07-15-BuildVectorPromote.ll
index 6591c64d87..078f1b05c3 100644
--- a/test/CodeGen/Generic/2012-07-15-BuildVectorPromote.ll
+++ b/test/CodeGen/X86/2012-07-15-BuildVectorPromote.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mcpu=corei7 < %s
+; RUN: llc < %s -march=x86 -mcpu=corei7
 ; We don't care about the output, just that it doesn't crash
 
 define <1 x i1> @buildvec_promote() {
diff --git a/test/CodeGen/X86/2012-07-15-broadcastfold.ll b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
index 3b7a8a7b87..2c7dfc8dfd 100644
--- a/test/CodeGen/X86/2012-07-15-broadcastfold.ll
+++ b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
@@ -3,7 +3,7 @@
 declare x86_fastcallcc i64 @barrier()
 
 ;CHECK: bcast_fold
-;CHECK: vmovaps %xmm{{[0-9]+}}, [[SPILLED:[^\)]+\)]]
+;CHECK: vmov{{[au]}}ps %xmm{{[0-9]+}}, [[SPILLED:[^\)]+\)]]
 ;CHECK: barrier
 ;CHECK: vbroadcastss [[SPILLED]], %ymm0
 ;CHECK: ret
diff --git a/test/CodeGen/X86/2012-11-28-merge-store-alias.ll b/test/CodeGen/X86/2012-11-28-merge-store-alias.ll
new file mode 100644
index 0000000000..756e86e0f8
--- /dev/null
+++ b/test/CodeGen/X86/2012-11-28-merge-store-alias.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-pc-win64 | FileCheck %s
+
+; CHECK: merge_stores_can
+; CHECK: callq foo
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: movups  %xmm0
+; CHECK: callq foo
+; CHECK: ret
+declare i32 @foo([10 x i32]* )
+
+define i32 @merge_stores_can() nounwind ssp {
+  %object1 = alloca [10 x i32]
+
+  %ret0 = call i32 @foo([10 x i32]* %object1) nounwind
+
+  %O1_1 = getelementptr [10 x i32]* %object1, i64 0, i32 1
+  %O1_2 = getelementptr [10 x i32]* %object1, i64 0, i32 2
+  %O1_3 = getelementptr [10 x i32]* %object1, i64 0, i32 3
+  %O1_4 = getelementptr [10 x i32]* %object1, i64 0, i32 4
+  %ld_ptr = getelementptr [10 x i32]* %object1, i64 0, i32 9
+
+  store i32 0, i32* %O1_1
+  store i32 0, i32* %O1_2
+  %ret = load  i32* %ld_ptr  ; <--- does not alias.
+  store i32 0, i32* %O1_3
+  store i32 0, i32* %O1_4
+
+  %ret1 = call i32 @foo([10 x i32]* %object1) nounwind
+
+  ret i32 %ret
+}
+
+; CHECK: merge_stores_cant
+; CHECK-NOT: xorps %xmm0, %xmm0
+; CHECK-NOT: movups  %xmm0
+; CHECK: ret
+define i32 @merge_stores_cant([10 x i32]* %in0, [10 x i32]* %in1) nounwind ssp {
+
+  %O1_1 = getelementptr [10 x i32]* %in1, i64 0, i32 1
+  %O1_2 = getelementptr [10 x i32]* %in1, i64 0, i32 2
+  %O1_3 = getelementptr [10 x i32]* %in1, i64 0, i32 3
+  %O1_4 = getelementptr [10 x i32]* %in1, i64 0, i32 4
+  %ld_ptr = getelementptr [10 x i32]* %in0, i64 0, i32 2
+
+  store i32 0, i32* %O1_1
+  store i32 0, i32* %O1_2
+  %ret = load  i32* %ld_ptr  ;  <--- may alias
+  store i32 0, i32* %O1_3
+  store i32 0, i32* %O1_4
+
+  ret i32 %ret
+}
diff --git a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
new file mode 100644
index 0000000000..f149e4a11e
--- /dev/null
+++ b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -enable-misched \
+; RUN:          -verify-machineinstrs | FileCheck %s
+;
+; Test LiveInterval update handling of DBG_VALUE.
+; rdar://12777252.
+;
+; CHECK: %entry
+; CHECK: DEBUG_VALUE: hg
+; CHECK: je
+
+%struct.node.0.27 = type { i16, double, [3 x double], i32, i32 }
+%struct.hgstruct.2.29 = type { %struct.bnode.1.28*, [3 x double], double, [3 x double] }
+%struct.bnode.1.28 = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode.1.28*, %struct.bnode.1.28* }
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define signext i16 @subdivp(%struct.node.0.27* nocapture %p, double %dsq, double %tolsq, %struct.hgstruct.2.29* nocapture byval align 8 %hg) nounwind uwtable readonly ssp {
+entry:
+  call void @llvm.dbg.declare(metadata !{%struct.hgstruct.2.29* %hg}, metadata !4)
+  %type = getelementptr inbounds %struct.node.0.27* %p, i64 0, i32 0
+  %0 = load i16* %type, align 2, !tbaa !8
+  %cmp = icmp eq i16 %0, 1
+  br i1 %cmp, label %return, label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %entry
+  %arrayidx6.1 = getelementptr inbounds %struct.hgstruct.2.29* %hg, i64 0, i32 1, i64 1
+  %cmp22 = fcmp olt double 0.000000e+00, %dsq
+  %conv24 = zext i1 %cmp22 to i16
+  br label %return
+
+return:                                           ; preds = %for.cond.preheader, %entry
+  %retval.0 = phi i16 [ %conv24, %for.cond.preheader ], [ 0, %entry ]
+  ret i16 %retval.0
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"MultiSource/Benchmarks/Olden/bh/newbh.c", metadata !"MultiSource/Benchmarks/Olden/bh", metadata !"clang version 3.3 (trunk 168918) (llvm/trunk 168920)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Olden/bh/newbh.c] [DW_LANG_C99]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{null}
+!4 = metadata !{i32 786689, null, metadata !"hg", metadata !5, i32 67109589, metadata !6, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [hg] [line 725]
+!5 = metadata !{i32 786473, metadata !"MultiSource/Benchmarks/Olden/bh/newbh.c", metadata !"MultiSource/Benchmarks/Olden/bh", null} ; [ DW_TAG_file_type ]
+!6 = metadata !{i32 786454, null, metadata !"hgstruct", metadata !5, i32 492, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ] [hgstruct] [line 492, size 0, align 0, offset 0] [from ]
+!7 = metadata !{i32 786451, null, metadata !"", metadata !5, i32 487, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [line 487, size 512, align 64, offset 0] [from ]
+!8 = metadata !{metadata !"short", metadata !9}
+!9 = metadata !{metadata !"omnipotent char", metadata !10}
+!10 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/2012-11-30-misched-dbg.ll b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
new file mode 100644
index 0000000000..f171c16df3
--- /dev/null
+++ b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
@@ -0,0 +1,134 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -enable-misched \
+; RUN:          -verify-machineinstrs | FileCheck %s
+;
+; Test MachineScheduler handling of DBG_VALUE.
+; rdar://12776937.
+;
+; CHECK: %if.else581
+; CHECK: DEBUG_VALUE: num1
+; CHECK: call
+
+%union.rec = type {}
+
+@.str15 = external hidden unnamed_addr constant [6 x i8], align 1
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @AttachGalley(%union.rec** nocapture %suspend_pt) nounwind uwtable ssp {
+entry:
+  %num14075 = alloca [20 x i8], align 16
+  br label %if.end33
+
+if.end33:                                         ; preds = %entry
+  %cmp1733 = icmp eq i32 undef, 0
+  br label %if.else581
+
+if.else581:                                       ; preds = %if.end33
+  %cmp586 = icmp eq i8 undef, -123
+  br i1 %cmp586, label %if.then588, label %if.else594
+
+if.then588:                                       ; preds = %if.else581
+  br label %for.cond1710.preheader
+
+if.else594:                                       ; preds = %if.else581
+  unreachable
+
+for.cond1710.preheader:                           ; preds = %if.then588
+  br label %for.cond1710
+
+for.cond1710:                                     ; preds = %for.cond1710, %for.cond1710.preheader
+  br i1 undef, label %for.cond1710, label %if.then3344
+
+if.then3344:
+  br label %if.then4073
+
+if.then4073:                                      ; preds = %if.then3344
+  call void @llvm.dbg.declare(metadata !{[20 x i8]* %num14075}, metadata !4)
+  %arraydecay4078 = getelementptr inbounds [20 x i8]* %num14075, i64 0, i64 0
+  %0 = load i32* undef, align 4
+  %add4093 = add nsw i32 %0, 0
+  %conv4094 = sitofp i32 %add4093 to float
+  %div4095 = fdiv float %conv4094, 5.670000e+02
+  %conv4096 = fpext float %div4095 to double
+  %call4097 = call i32 (i8*, i32, i64, i8*, ...)* @__sprintf_chk(i8* %arraydecay4078, i32 0, i64 20, i8* getelementptr inbounds ([6 x i8]* @.str15, i64 0, i64 0), double %conv4096) nounwind
+  br i1 %cmp1733, label %if.then4107, label %if.else4114
+
+if.then4107:                                      ; preds = %if.then4073
+  unreachable
+
+if.else4114:                                      ; preds = %if.then4073
+  unreachable
+}
+
+declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c", metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset", metadata !"clang version 3.3 (trunk 168918) (llvm/trunk 168920)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/MiBench/consumer-typeset/MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c] [DW_LANG_C99]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{}
+!4 = metadata !{i32 786688, metadata !5, metadata !"num1", metadata !14, i32 815, metadata !15, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [num1] [line 815]
+!5 = metadata !{i32 786443, metadata !6, i32 815, i32 0, metadata !14, i32 177} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!6 = metadata !{i32 786443, metadata !7, i32 812, i32 0, metadata !14, i32 176} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!7 = metadata !{i32 786443, metadata !8, i32 807, i32 0, metadata !14, i32 175} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!8 = metadata !{i32 786443, metadata !9, i32 440, i32 0, metadata !14, i32 94} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!9 = metadata !{i32 786443, metadata !10, i32 435, i32 0, metadata !14, i32 91} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!10 = metadata !{i32 786443, metadata !11, i32 434, i32 0, metadata !14, i32 90} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!11 = metadata !{i32 786443, metadata !12, i32 250, i32 0, metadata !14, i32 24} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!12 = metadata !{i32 786443, metadata !13, i32 249, i32 0, metadata !14, i32 23} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!13 = metadata !{i32 786443, metadata !3, i32 221, i32 0, metadata !14, i32 19} ; [ DW_TAG_lexical_block ] [MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c]
+!14 = metadata !{i32 786473, metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c", metadata !"MultiSource/Benchmarks/MiBench/consumer-typeset", null} ; [ DW_TAG_file_type ]
+!15 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 160, i64 8, i32 0, i32 0, metadata !16, metadata !17, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 160, align 8, offset 0] [from char]
+!16 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] [char] [line 0, size 8, align 8, offset 0, enc DW_ATE_signed_char]
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 786465, i64 0, i64 20}       ; [ DW_TAG_subrange_type ] [0, 19]
+
+; Test DebugValue uses visited by RegisterPressureTracker findUseBetween().
+;
+; CHECK: @main
+; CHECK: DEBUG_VALUE: X
+; CHECK: call
+
+%"class.__gnu_cxx::hash_map" = type { %"class.__gnu_cxx::hashtable" }
+%"class.__gnu_cxx::hashtable" = type { i64, i64, i64, i64, i64, i64 }
+
+define void @main() uwtable ssp {
+entry:
+  %X = alloca %"class.__gnu_cxx::hash_map", align 8
+  br i1 undef, label %cond.true, label %cond.end
+
+cond.true:                                        ; preds = %entry
+  unreachable
+
+cond.end:                                         ; preds = %entry
+  call void @llvm.dbg.declare(metadata !{%"class.__gnu_cxx::hash_map"* %X}, metadata !21)
+  %_M_num_elements.i.i.i.i = getelementptr inbounds %"class.__gnu_cxx::hash_map"* %X, i64 0, i32 0, i32 5
+  invoke void @_Znwm()
+          to label %exit.i unwind label %lpad2.i.i.i.i
+
+exit.i:                                           ; preds = %cond.end
+  unreachable
+
+lpad2.i.i.i.i:                                    ; preds = %cond.end
+  %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+          cleanup
+  br i1 undef, label %lpad.body.i.i, label %if.then.i.i.i.i.i.i.i.i
+
+if.then.i.i.i.i.i.i.i.i:                          ; preds = %lpad2.i.i.i.i
+  unreachable
+
+lpad.body.i.i:                                    ; preds = %lpad2.i.i.i.i
+  resume { i8*, i32 } %0
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_Znwm()
+
+!llvm.dbg.cu = !{!20}
+
+!20 = metadata !{i32 786449, i32 0, i32 4, metadata !"SingleSource/Benchmarks/Shootout-C++/hash.cpp", metadata !"SingleSource/Benchmarks/Shootout-C++", metadata !"clang version 3.3 (trunk 169129) (llvm/trunk 169135)", i1 true, i1 true, metadata !"", i32 0, null, null, null, null} ; [ DW_TAG_compile_unit ] [SingleSource/Benchmarks/Shootout-C++/hash.cpp] [DW_LANG_C_plus_plus]
+!21 = metadata !{i32 786688, null, metadata !"X", null, i32 29, metadata !22, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [X] [line 29]
+!22 = metadata !{i32 786454, null, metadata !"HM", metadata !23, i32 28, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_typedef ] [HM] [line 28, size 0, align 0, offset 0] [from ]
+!23 = metadata !{i32 786473, metadata !"SingleSource/Benchmarks/Shootout-C++/hash.cpp", metadata !"SingleSource/Benchmarks/Shootout-C++", null} ; [ DW_TAG_file_type ]
diff --git a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
new file mode 100644
index 0000000000..d290d514cc
--- /dev/null
+++ b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -enable-misched \
+; RUN:          -verify-machineinstrs | FileCheck %s
+;
+; Test RegisterPressure handling of DBG_VALUE.
+;
+; CHECK: %entry
+; CHECK: DEBUG_VALUE: callback
+; CHECK: ret
+
+%struct.btCompoundLeafCallback = type { i32, i32 }
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define void @test() unnamed_addr uwtable ssp align 2 {
+entry:
+  %callback = alloca %struct.btCompoundLeafCallback, align 8
+  br i1 undef, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  unreachable
+
+if.end:                                           ; preds = %entry
+  call void @llvm.dbg.declare(metadata !{%struct.btCompoundLeafCallback* %callback}, metadata !3)
+  %m = getelementptr inbounds %struct.btCompoundLeafCallback* %callback, i64 0, i32 1
+  store i32 0, i32* undef, align 8
+  %cmp12447 = icmp sgt i32 undef, 0
+  br i1 %cmp12447, label %for.body.lr.ph, label %invoke.cont44
+
+for.body.lr.ph:                                   ; preds = %if.end
+  unreachable
+
+invoke.cont44:                                    ; preds = %if.end
+  ret void
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", metadata !"MultiSource/Benchmarks/Bullet", metadata !"clang version 3.3 (trunk 168984) (llvm/trunk 168983)", i1 true, i1 true, metadata !"", i32 0, metadata !1, null, null, null} ; [ DW_TAG_compile_unit ] [MultiSource/Benchmarks/Bullet/MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !2}
+!2 = metadata !{null, null}
+!3 = metadata !{i32 786688, null, metadata !"callback", null, i32 214, metadata !4, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [callback] [line 214]
+!4 = metadata !{i32 786451, null, metadata !"btCompoundLeafCallback", metadata !5, i32 90, i64 512, i64 64, i32 0, i32 0, null, null, i32 0, null, null} ; [ DW_TAG_structure_type ] [btCompoundLeafCallback] [line 90, size 512, align 64, offset 0] [from ]
+!5 = metadata !{i32 786473, metadata !"MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", metadata !"MultiSource/Benchmarks/Bullet", null} ; [ DW_TAG_file_type ]
diff --git a/test/CodeGen/X86/2012-12-06-python27-miscompile.ll b/test/CodeGen/X86/2012-12-06-python27-miscompile.ll
new file mode 100644
index 0000000000..d9effc92fa
--- /dev/null
+++ b/test/CodeGen/X86/2012-12-06-python27-miscompile.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mtriple=i686-pc-win32 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Make sure that we are zeroing one memory location at a time using xorl and
+; not both using XMM registers.
+
+;CHECK: @foo
+;CHECK: xorl
+;CHECK-NOT: xmm
+;CHECK: ret
+define i32 @foo (i64* %so) nounwind uwtable ssp {
+entry:
+  %used = getelementptr inbounds i64* %so, i32 3
+  store i64 0, i64* %used, align 8
+  %fill = getelementptr inbounds i64* %so, i32 2
+  %L = load i64* %fill, align 8
+  store i64 0, i64* %fill, align 8
+  %cmp28 = icmp sgt i64 %L, 0
+  %R = sext i1 %cmp28 to i32
+  ret i32 %R
+}
diff --git a/test/CodeGen/X86/2012-12-1-merge-multiple.ll b/test/CodeGen/X86/2012-12-1-merge-multiple.ll
new file mode 100644
index 0000000000..5931c3d27b
--- /dev/null
+++ b/test/CodeGen/X86/2012-12-1-merge-multiple.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mtriple=x86_64-pc-win64 | FileCheck %s
+
+; CHECK: multiple_stores_on_chain
+; CHECK: movabsq
+; CHECK: movq
+; CHECK: movabsq
+; CHECK: movq
+; CHECK: ret
+define void @multiple_stores_on_chain(i16 * %A) {
+entry:
+  %a0 = getelementptr inbounds i16* %A, i64 0
+  %a1 = getelementptr inbounds i16* %A, i64 1
+  %a2 = getelementptr inbounds i16* %A, i64 2
+  %a3 = getelementptr inbounds i16* %A, i64 3
+  %a4 = getelementptr inbounds i16* %A, i64 4
+  %a5 = getelementptr inbounds i16* %A, i64 5
+  %a6 = getelementptr inbounds i16* %A, i64 6
+  %a7 = getelementptr inbounds i16* %A, i64 7
+
+  store i16 0, i16* %a0
+  store i16 1, i16* %a1
+  store i16 2, i16* %a2
+  store i16 3, i16* %a3
+  store i16 4, i16* %a4
+  store i16 5, i16* %a5
+  store i16 6, i16* %a6
+  store i16 7, i16* %a7
+
+  ret void
+}
+
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 88ecd5a5d3..0be83f648d 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -671,7 +671,9 @@ define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
   ; CHECK: test_x86_sse2_storeu_dq
   ; CHECK: movl
   ; CHECK: vmovdqu
-  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
+  ; add operation forces the execution domain.
+  %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
   ret void
 }
 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
@@ -681,6 +683,7 @@ define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
   ; CHECK: test_x86_sse2_storeu_pd
   ; CHECK: movl
   ; CHECK: vmovupd
+  ; fadd operation forces the execution domain.
   %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
   call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
   ret void
@@ -2345,7 +2348,7 @@ declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind rea
 
 
 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
-  ; CHECK: vpermilps
+  ; CHECK: vpshufd
   %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
diff --git a/test/CodeGen/X86/avx-shuffle.ll b/test/CodeGen/X86/avx-shuffle.ll
index 904f048d1e..65685a3224 100644
--- a/test/CodeGen/X86/avx-shuffle.ll
+++ b/test/CodeGen/X86/avx-shuffle.ll
@@ -6,7 +6,7 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
   ret <4 x float> %b
 ; CHECK: test1:
 ; CHECK: vshufps
-; CHECK: vpermilps
+; CHECK: vpshufd
 }
 
 ; rdar://10538417
@@ -106,7 +106,7 @@ define <4 x float> @test11(<4 x float> %a) nounwind  {
 
 define <4 x float> @test12(<4 x float>* %a) nounwind  {
 ; CHECK: test12
-; CHECK: vpermilps $27, (
+; CHECK: vpshufd
   %tmp0 = load <4 x float>* %a
   %tmp1 = shufflevector <4 x float> %tmp0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x float> %tmp1
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index 5ad75236e1..67e4b40810 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -84,7 +84,7 @@ define <8 x float> @funcF(i32 %val) nounwind {
   ret <8 x float> %tmp
 }
 
-; CHECK: vpermilps  $0
+; CHECK: vpshufd  $0
 ; CHECK-NEXT: vinsertf128  $1
 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
@@ -93,7 +93,7 @@ entry:
 }
 
 ; CHECK: vextractf128  $1
-; CHECK-NEXT: vpermilps  $85
+; CHECK-NEXT: vpshufd
 ; CHECK-NEXT: vinsertf128  $1
 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/avx2-shuffle.ll b/test/CodeGen/X86/avx2-shuffle.ll
index a414e6880c..cf319cb7fe 100644
--- a/test/CodeGen/X86/avx2-shuffle.ll
+++ b/test/CodeGen/X86/avx2-shuffle.ll
@@ -4,15 +4,62 @@
 ; The mask for the vpblendw instruction needs to be identical for both halves
 ; of the YMM. Need to use two vpblendw instructions.
 
-; CHECK: blendw1
-; CHECK: vpblendw
-; CHECK: vpblendw
+; CHECK: vpblendw_test1
+; mask = 10010110,b = 150,d
+; CHECK: vpblendw  $150, %ymm
 ; CHECK: ret
-define <16 x i16> @blendw1(<16 x i16> %a, <16 x i16> %b) nounwind alwaysinline {
-  %t = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 18, i32 3, i32 20, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
+define <16 x i16> @vpblendw_test1(<16 x i16> %a, <16 x i16> %b) nounwind alwaysinline {
+  %t = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 18, i32 3,  i32 20, i32 5,  i32 6,  i32 23, 
+                                                               i32 8, i32 25, i32 26, i32 11, i32 28, i32 13, i32 14, i32 31>
   ret <16 x i16> %t
 }
 
+; CHECK: vpblendw_test2
+; mask1 = 00010110 = 22
+; mask2 = 10000000 = 128
+; CHECK: vpblendw  $128, %xmm
+; CHECK: vpblendw  $22, %xmm
+; CHECK: vinserti128
+; CHECK: ret
+define <16 x i16> @vpblendw_test2(<16 x i16> %a, <16 x i16> %b) nounwind alwaysinline {
+  %t = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 18, i32 3, i32 20, i32 5, i32 6, i32 7, 
+                                                               i32 8, i32 9,  i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
+  ret <16 x i16> %t
+}
+
+; CHECK: blend_test1
+; CHECK: vpblendd
+; CHECK: ret
+define <8 x i32> @blend_test1(<8 x i32> %a, <8 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 12, i32 5, i32 6, i32 7>
+  ret <8 x i32> %t
+}
+
+; CHECK: blend_test2
+; CHECK: vpblendd
+; CHECK: ret
+define <8 x i32> @blend_test2(<8 x i32> %a, <8 x i32> %b) nounwind alwaysinline {
+  %t = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 12, i32 5, i32 6, i32 7>
+  ret <8 x i32> %t
+}
+
+
+; CHECK: blend_test3
+; CHECK: vblendps
+; CHECK: ret
+define <8 x float> @blend_test3(<8 x float> %a, <8 x float> %b) nounwind alwaysinline {
+  %t = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 12, i32 5, i32 6, i32 7>
+  ret <8 x float> %t
+}
+
+; CHECK: blend_test4
+; CHECK: vblendpd
+; CHECK: ret
+define <4 x i64> @blend_test4(<4 x i64> %a, <4 x i64> %b) nounwind alwaysinline {
+  %t = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+  ret <4 x i64> %t
+}
+
 ; CHECK: vpshufhw $27, %ymm
 define <16 x i16> @vpshufhw(<16 x i16> %src1) nounwind uwtable readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll
index 11f811f8cf..34445428ea 100644
--- a/test/CodeGen/X86/blend-msb.ll
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -28,7 +28,7 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
 ; reduce the mask in this case.
 ;CHECK: vsel_8xi16
 ;CHECK: psllw
-;CHECK: psraw
+;CHECK-NOT: psraw
 ;CHECK: pblendvb
 ;CHECK: ret
 define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
diff --git a/test/CodeGen/X86/bt.ll b/test/CodeGen/X86/bt.ll
index ec447e5e9c..39a784dec3 100644
--- a/test/CodeGen/X86/bt.ll
+++ b/test/CodeGen/X86/bt.ll
@@ -1,6 +1,4 @@
-; RUN: llc < %s -march=x86 | grep btl | count 28
-; RUN: llc < %s -march=x86 -mcpu=pentium4 | grep btl | not grep esp
-; RUN: llc < %s -march=x86 -mcpu=penryn   | grep btl | not grep esp
+; RUN: llc < %s -mtriple=i386-apple-macosx -mcpu=penryn | FileCheck %s
 ; PR3253
 
 ; The register+memory form of the BT instruction should be usable on
@@ -21,6 +19,9 @@
 
 define void @test2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: test2
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -36,6 +37,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @test2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: test2b
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -51,6 +55,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @atest2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: atest2
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -66,6 +73,8 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @atest2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: atest2b
+; CHECK: btl %eax, %ecx
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -81,6 +90,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @test3(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: test3
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -96,6 +108,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @test3b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: test3b
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
@@ -111,6 +126,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @testne2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: testne2
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -126,6 +144,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @testne2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: testne2b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -141,6 +162,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @atestne2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: atestne2
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -156,6 +180,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @atestne2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: atestne2b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -171,6 +198,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @testne3(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: testne3
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -186,6 +216,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @testne3b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: testne3b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
@@ -201,6 +234,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query2
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
@@ -216,6 +252,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query2b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
@@ -231,6 +270,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @aquery2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: aquery2
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
@@ -246,6 +288,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @aquery2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: aquery2b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
@@ -261,6 +306,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query3(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query3
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp3, %tmp29		; <i1> [#uses=1]
@@ -276,6 +324,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query3b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query3b
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp eq i32 %tmp3, %tmp29		; <i1> [#uses=1]
@@ -291,6 +342,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query3x(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query3x
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp eq i32 %tmp29, %tmp3		; <i1> [#uses=1]
@@ -306,6 +360,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @query3bx(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: query3bx
+; CHECK: btl %eax, %ecx
+; CHECK: jae
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp eq i32 %tmp29, %tmp3		; <i1> [#uses=1]
@@ -321,6 +378,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne2
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
@@ -336,6 +396,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne2b
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
@@ -351,6 +414,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @aqueryne2(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: aqueryne2
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
@@ -366,6 +432,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @aqueryne2b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: aqueryne2b
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 1, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
@@ -381,6 +450,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne3(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne3
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp3, %tmp29		; <i1> [#uses=1]
@@ -396,6 +468,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne3b(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne3b
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp ne i32 %tmp3, %tmp29		; <i1> [#uses=1]
@@ -411,6 +486,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne3x(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne3x
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
 	%tmp4 = icmp ne i32 %tmp29, %tmp3		; <i1> [#uses=1]
@@ -426,6 +504,9 @@ UnifiedReturnBlock:		; preds = %entry
 
 define void @queryne3bx(i32 %x, i32 %n) nounwind {
 entry:
+; CHECK: queryne3bx
+; CHECK: btl %eax, %ecx
+; CHECK: jb
 	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
 	%tmp3 = and i32 %x, %tmp29
 	%tmp4 = icmp ne i32 %tmp29, %tmp3		; <i1> [#uses=1]
@@ -440,3 +521,16 @@ UnifiedReturnBlock:		; preds = %entry
 }
 
 declare void @foo()
+
+; rdar://12755626
+define zeroext i1 @invert(i32 %flags, i32 %flag) nounwind {
+; CHECK: invert
+; CHECK: btl %eax, %ecx
+; CHECK: setae
+entry:
+  %neg = xor i32 %flags, -1
+  %shl = shl i32 1, %flag
+  %and = and i32 %shl, %neg
+  %tobool = icmp ne i32 %and, 0
+  ret i1 %tobool
+}
diff --git a/test/CodeGen/X86/dbg-at-specficiation.ll b/test/CodeGen/X86/dbg-at-specficiation.ll
index aa5e6efede..48b8202bd5 100644
--- a/test/CodeGen/X86/dbg-at-specficiation.ll
+++ b/test/CodeGen/X86/dbg-at-specficiation.ll
@@ -17,4 +17,4 @@
 !7 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 320, i64 32, i32 0, i32 0, metadata !8, metadata !9, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !8 = metadata !{i32 720932, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
 !9 = metadata !{metadata !10}
-!10 = metadata !{i32 720929, i64 0, i64 9}        ; [ DW_TAG_subrange_type ]
+!10 = metadata !{i32 720929, i64 0, i64 10}        ; [ DW_TAG_subrange_type ]
diff --git a/test/CodeGen/X86/dbg-declare.ll b/test/CodeGen/X86/dbg-declare.ll
index 5d4cedc5c4..b73e310cc5 100644
--- a/test/CodeGen/X86/dbg-declare.ll
+++ b/test/CodeGen/X86/dbg-declare.ll
@@ -51,7 +51,7 @@ declare void @llvm.stackrestore(i8*) nounwind
 !19 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 8, i32 0, i32 0, metadata !20, metadata !21, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !20 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !21 = metadata !{metadata !22}
-!22 = metadata !{i32 786465, i64 1, i64 0}        ; [ DW_TAG_subrange_type ]
+!22 = metadata !{i32 786465, i64 0, i64 -1}        ; [ DW_TAG_subrange_type ]
 !23 = metadata !{i32 7, i32 8, metadata !17, null}
 !24 = metadata !{i32 9, i32 1, metadata !17, null}
 !25 = metadata !{i32 8, i32 3, metadata !17, null}
diff --git a/test/CodeGen/X86/dbg-subrange.ll b/test/CodeGen/X86/dbg-subrange.ll
index 788910c7fe..0efb50e9a9 100644
--- a/test/CodeGen/X86/dbg-subrange.ll
+++ b/test/CodeGen/X86/dbg-subrange.ll
@@ -31,7 +31,7 @@ entry:
 !14 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 34359738368, i64 8, i32 0, i32 0, metadata !15, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !15 = metadata !{i32 720932, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !16 = metadata !{metadata !17}
-!17 = metadata !{i32 720929, i64 0, i64 4294967295} ; [ DW_TAG_subrange_type ]
+!17 = metadata !{i32 720929, i64 0, i64 4294967296} ; [ DW_TAG_subrange_type ]
 !18 = metadata !{i32 5, i32 3, metadata !19, null}
 !19 = metadata !{i32 720907, metadata !5, i32 4, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
 !20 = metadata !{i32 6, i32 1, metadata !19, null}
diff --git a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
index 2fe1ecd40e..7a1a9ae461 100644
--- a/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/fma4-intrinsics-x86_64.ll
@@ -63,6 +63,16 @@ define < 4 x float > @test_x86_fma_vfmadd_ps_load2(< 4 x float > %a0, < 4 x floa
 }
 declare < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
+; To test execution dependency
+define < 4 x float > @test_x86_fma_vfmadd_ps_load3(< 4 x float >* %a0, < 4 x float >* %a1, < 4 x float > %a2) {
+  ; CHECK: vmovaps
+  ; CHECK: vfmaddps %{{.*}}, (%{{.*}})
+  %x = load <4 x float>* %a0
+  %y = load <4 x float>* %a1
+  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ps(< 4 x float > %x, < 4 x float > %y, < 4 x float > %a2) ; <i64> [#uses=1]
+  ret < 4 x float > %res
+}
+
 define < 2 x double > @test_x86_fma_vfmadd_pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) {
   ; CHECK: vfmaddpd
   %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %a2) ; <i64> [#uses=1]
@@ -82,6 +92,16 @@ define < 2 x double > @test_x86_fma_vfmadd_pd_load2(< 2 x double > %a0, < 2 x do
 }
 declare < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 
+; To test execution dependency
+define < 2 x double > @test_x86_fma_vfmadd_pd_load3(< 2 x double >* %a0, < 2 x double >* %a1, < 2 x double > %a2) {
+  ; CHECK: vmovapd
+  ; CHECK: vfmaddpd %{{.*}}, (%{{.*}})
+  %x = load <2 x double>* %a0
+  %y = load <2 x double>* %a1
+  %res = call < 2 x double > @llvm.x86.fma.vfmadd.pd(< 2 x double > %x, < 2 x double > %y, < 2 x double > %a2) ; <i64> [#uses=1]
+  ret < 2 x double > %res
+}
+
 define < 8 x float > @test_x86_fma_vfmadd_ps_256(< 8 x float > %a0, < 8 x float > %a1, < 8 x float > %a2) {
   ; CHECK: vfmaddps
   ; CHECK: ymm
diff --git a/test/CodeGen/X86/fold-load-vec.ll b/test/CodeGen/X86/fold-load-vec.ll
new file mode 100644
index 0000000000..c1756d5e2e
--- /dev/null
+++ b/test/CodeGen/X86/fold-load-vec.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+sse41 | FileCheck %s
+
+; rdar://12721174
+; We should not fold movss into pshufd since pshufd expects m128 while movss
+; loads from m32.
+define void @sample_test(<4 x float>* %source, <2 x float>* %dest) nounwind {
+; CHECK: sample_test
+; CHECK: movss
+; CHECK: pshufd
+entry:
+  %source.addr = alloca <4 x float>*, align 8
+  %dest.addr = alloca <2 x float>*, align 8
+  %tmp = alloca <2 x float>, align 8
+  store <4 x float>* %source, <4 x float>** %source.addr, align 8
+  store <2 x float>* %dest, <2 x float>** %dest.addr, align 8
+  store <2 x float> zeroinitializer, <2 x float>* %tmp, align 8
+  %0 = load <4 x float>** %source.addr, align 8
+  %arrayidx = getelementptr inbounds <4 x float>* %0, i64 0
+  %1 = load <4 x float>* %arrayidx, align 16
+  %2 = extractelement <4 x float> %1, i32 0
+  %3 = load <2 x float>* %tmp, align 8
+  %4 = insertelement <2 x float> %3, float %2, i32 1
+  store <2 x float> %4, <2 x float>* %tmp, align 8
+  %5 = load <2 x float>* %tmp, align 8
+  %6 = load <2 x float>** %dest.addr, align 8
+  %arrayidx1 = getelementptr inbounds <2 x float>* %6, i64 0
+  store <2 x float> %5, <2 x float>* %arrayidx1, align 8
+  %7 = load <2 x float>** %dest.addr, align 8
+  %arrayidx2 = getelementptr inbounds <2 x float>* %7, i64 0
+  %8 = load <2 x float>* %arrayidx2, align 8
+  %vecext = extractelement <2 x float> %8, i32 0
+  %9 = load <2 x float>** %dest.addr, align 8
+  %arrayidx3 = getelementptr inbounds <2 x float>* %9, i64 0
+  %10 = load <2 x float>* %arrayidx3, align 8
+  %vecext4 = extractelement <2 x float> %10, i32 1
+  call void @ext(float %vecext, float %vecext4)
+  ret void
+}
+declare void @ext(float, float)
diff --git a/test/CodeGen/X86/fold-pcmpeqd-2.ll b/test/CodeGen/X86/fold-pcmpeqd-2.ll
index 9cf4607cf5..2bde76efd2 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-2.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -43,21 +43,21 @@ forbody:		; preds = %forcond
 	%mul171.i = fmul <4 x float> %add167.i, %sub140.i		; <<4 x float>> [#uses=1]
 	%add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
 	%bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%andnps178.i = add <4 x i32> %bitcast176.i, <i32 1, i32 1, i32 1, i32 1>		; <<4 x i32>> [#uses=1]
 	%bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float>		; <<4 x float>> [#uses=1]
 	%mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%andnps192.i = add <4 x i32> %bitcast190.i, <i32 1, i32 1, i32 1, i32 1>		; <<4 x i32>> [#uses=1]
 	%xorps.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
-	%orps203.i = or <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
+	%orps203.i = add <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
 	%bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float>		; <<4 x float>> [#uses=1]
 	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
 	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
 	%cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind		; <<4 x float>> [#uses=1]
 	%bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32>		; <<4 x i32>> [#uses=2]
-	%andps.i14 = and <4 x i32> zeroinitializer, %bitcast6.i13		; <<4 x i32>> [#uses=1]
+	%andps.i14 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %bitcast6.i13		; <<4 x i32>> [#uses=1]
 	%not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
-	%andnps.i17 = and <4 x i32> zeroinitializer, %not.i16		; <<4 x i32>> [#uses=1]
+	%andnps.i17 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %not.i16		; <<4 x i32>> [#uses=1]
 	%orps.i18 = or <4 x i32> %andnps.i17, %andps.i14		; <<4 x i32>> [#uses=1]
 	%bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float>		; <<4 x float>> [#uses=1]
 	%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index 7a2bbc4ef0..dcc8f0d268 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -10,18 +10,18 @@
 define void @t1(i32 %argc, i8** %argv) nounwind  {
 entry:
 ; SSE2: t1:
+; SSE2: movsd _.str+16, %xmm0
+; SSE2: movsd %xmm0, 16(%esp)
 ; SSE2: movaps _.str, %xmm0
 ; SSE2: movaps %xmm0
-; SSE2: movb $0
-; SSE2: movl $0
-; SSE2: movl $0
+; SSE2: movb $0, 24(%esp)
 
 ; SSE1: t1:
+; SSE1: fldl _.str+16
+; SSE1: fstpl 16(%esp)
 ; SSE1: movaps _.str, %xmm0
 ; SSE1: movaps %xmm0
-; SSE1: movb $0
-; SSE1: movl $0
-; SSE1: movl $0
+; SSE1: movb $0, 24(%esp)
 
 ; NOSSE: t1:
 ; NOSSE: movb $0
diff --git a/test/CodeGen/X86/misched-matrix.ll b/test/CodeGen/X86/misched-matrix.ll
new file mode 100644
index 0000000000..f5566e5e5d
--- /dev/null
+++ b/test/CodeGen/X86/misched-matrix.ll
@@ -0,0 +1,195 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
+; RUN:          -misched-topdown -verify-machineinstrs \
+; RUN:     | FileCheck %s -check-prefix=TOPDOWN
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
+; RUN:          -misched=ilpmin -verify-machineinstrs \
+; RUN:     | FileCheck %s -check-prefix=ILPMIN
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -pre-RA-sched=source -enable-misched \
+; RUN:          -misched=ilpmax -verify-machineinstrs \
+; RUN:     | FileCheck %s -check-prefix=ILPMAX
+;
+; Verify that the MI scheduler minimizes register pressure for a
+; uniform set of bottom-up subtrees (unrolled matrix multiply).
+;
+; For current top-down heuristics, ensure that some folded imulls have
+; been reordered with the stores. This tests the scheduler's cheap
+; alias analysis ability (that doesn't require any AliasAnalysis pass).
+;
+; TOPDOWN: %for.body
+; TOPDOWN: movl %{{.*}}, (
+; TOPDOWN: imull {{[0-9]*}}(
+; TOPDOWN: movl %{{.*}}, 4(
+; TOPDOWN: imull {{[0-9]*}}(
+; TOPDOWN: movl %{{.*}}, 8(
+; TOPDOWN: movl %{{.*}}, 12(
+; TOPDOWN: %for.end
+;
+; For -misched=ilpmin, verify that each expression subtree is
+; scheduled independently, and that the imull/adds are interleaved.
+;
+; ILPMIN: %for.body
+; ILPMIN: movl %{{.*}}, (
+; ILPMIN: imull
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: movl %{{.*}}, 4(
+; ILPMIN: imull
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: movl %{{.*}}, 8(
+; ILPMIN: imull
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: imull
+; ILPMIN: addl
+; ILPMIN: movl %{{.*}}, 12(
+; ILPMIN: %for.end
+;
+; For -misched=ilpmax, verify that each expression subtree is
+; scheduled independently, and that the imull/adds are clustered.
+;
+; ILPMAX: %for.body
+; ILPMAX: movl %{{.*}}, (
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: movl %{{.*}}, 4(
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: movl %{{.*}}, 8(
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: imull
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: addl
+; ILPMAX: movl %{{.*}}, 12(
+; ILPMAX: %for.end
+
+define void @mmult([4 x i32]* noalias nocapture %m1, [4 x i32]* noalias nocapture %m2,
+[4 x i32]* noalias nocapture %m3) nounwind uwtable ssp {
+entry:
+  br label %for.body
+
+for.body:                              ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx8 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 0
+  %tmp = load i32* %arrayidx8, align 4, !tbaa !0
+  %arrayidx12 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 0
+  %tmp1 = load i32* %arrayidx12, align 4, !tbaa !0
+  %arrayidx8.1 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 1
+  %tmp2 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %arrayidx12.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 0
+  %tmp3 = load i32* %arrayidx12.1, align 4, !tbaa !0
+  %arrayidx8.2 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 2
+  %tmp4 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %arrayidx12.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 0
+  %tmp5 = load i32* %arrayidx12.2, align 4, !tbaa !0
+  %arrayidx8.3 = getelementptr inbounds [4 x i32]* %m1, i64 %indvars.iv, i64 3
+  %tmp6 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %arrayidx12.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 0
+  %tmp8 = load i32* %arrayidx8, align 4, !tbaa !0
+  %arrayidx12.137 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 1
+  %tmp9 = load i32* %arrayidx12.137, align 4, !tbaa !0
+  %tmp10 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %arrayidx12.1.1 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 1
+  %tmp11 = load i32* %arrayidx12.1.1, align 4, !tbaa !0
+  %tmp12 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %arrayidx12.2.1 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 1
+  %tmp13 = load i32* %arrayidx12.2.1, align 4, !tbaa !0
+  %tmp14 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %arrayidx12.3.1 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 1
+  %tmp15 = load i32* %arrayidx12.3.1, align 4, !tbaa !0
+  %tmp16 = load i32* %arrayidx8, align 4, !tbaa !0
+  %arrayidx12.239 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 2
+  %tmp17 = load i32* %arrayidx12.239, align 4, !tbaa !0
+  %tmp18 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %arrayidx12.1.2 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 2
+  %tmp19 = load i32* %arrayidx12.1.2, align 4, !tbaa !0
+  %tmp20 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %arrayidx12.2.2 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 2
+  %tmp21 = load i32* %arrayidx12.2.2, align 4, !tbaa !0
+  %tmp22 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %arrayidx12.3.2 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 2
+  %tmp23 = load i32* %arrayidx12.3.2, align 4, !tbaa !0
+  %tmp24 = load i32* %arrayidx8, align 4, !tbaa !0
+  %arrayidx12.341 = getelementptr inbounds [4 x i32]* %m2, i64 0, i64 3
+  %tmp25 = load i32* %arrayidx12.341, align 4, !tbaa !0
+  %tmp26 = load i32* %arrayidx8.1, align 4, !tbaa !0
+  %arrayidx12.1.3 = getelementptr inbounds [4 x i32]* %m2, i64 1, i64 3
+  %tmp27 = load i32* %arrayidx12.1.3, align 4, !tbaa !0
+  %tmp28 = load i32* %arrayidx8.2, align 4, !tbaa !0
+  %arrayidx12.2.3 = getelementptr inbounds [4 x i32]* %m2, i64 2, i64 3
+  %tmp29 = load i32* %arrayidx12.2.3, align 4, !tbaa !0
+  %tmp30 = load i32* %arrayidx8.3, align 4, !tbaa !0
+  %arrayidx12.3.3 = getelementptr inbounds [4 x i32]* %m2, i64 3, i64 3
+  %tmp31 = load i32* %arrayidx12.3.3, align 4, !tbaa !0
+  %tmp7 = load i32* %arrayidx12.3, align 4, !tbaa !0
+  %mul = mul nsw i32 %tmp1, %tmp
+  %mul.1 = mul nsw i32 %tmp3, %tmp2
+  %mul.2 = mul nsw i32 %tmp5, %tmp4
+  %mul.3 = mul nsw i32 %tmp7, %tmp6
+  %mul.138 = mul nsw i32 %tmp9, %tmp8
+  %mul.1.1 = mul nsw i32 %tmp11, %tmp10
+  %mul.2.1 = mul nsw i32 %tmp13, %tmp12
+  %mul.3.1 = mul nsw i32 %tmp15, %tmp14
+  %mul.240 = mul nsw i32 %tmp17, %tmp16
+  %mul.1.2 = mul nsw i32 %tmp19, %tmp18
+  %mul.2.2 = mul nsw i32 %tmp21, %tmp20
+  %mul.3.2 = mul nsw i32 %tmp23, %tmp22
+  %mul.342 = mul nsw i32 %tmp25, %tmp24
+  %mul.1.3 = mul nsw i32 %tmp27, %tmp26
+  %mul.2.3 = mul nsw i32 %tmp29, %tmp28
+  %mul.3.3 = mul nsw i32 %tmp31, %tmp30
+  %add.1 = add nsw i32 %mul.1, %mul
+  %add.2 = add nsw i32 %mul.2, %add.1
+  %add.3 = add nsw i32 %mul.3, %add.2
+  %add.1.1 = add nsw i32 %mul.1.1, %mul.138
+  %add.2.1 = add nsw i32 %mul.2.1, %add.1.1
+  %add.3.1 = add nsw i32 %mul.3.1, %add.2.1
+  %add.1.2 = add nsw i32 %mul.1.2, %mul.240
+  %add.2.2 = add nsw i32 %mul.2.2, %add.1.2
+  %add.3.2 = add nsw i32 %mul.3.2, %add.2.2
+  %add.1.3 = add nsw i32 %mul.1.3, %mul.342
+  %add.2.3 = add nsw i32 %mul.2.3, %add.1.3
+  %add.3.3 = add nsw i32 %mul.3.3, %add.2.3
+  %arrayidx16 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 0
+  store i32 %add.3, i32* %arrayidx16, align 4, !tbaa !0
+  %arrayidx16.1 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 1
+  store i32 %add.3.1, i32* %arrayidx16.1, align 4, !tbaa !0
+  %arrayidx16.2 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 2
+  store i32 %add.3.2, i32* %arrayidx16.2, align 4, !tbaa !0
+  %arrayidx16.3 = getelementptr inbounds [4 x i32]* %m3, i64 %indvars.iv, i64 3
+  store i32 %add.3.3, i32* %arrayidx16.3, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                        ; preds = %for.body
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/misched-new.ll b/test/CodeGen/X86/misched-new.ll
index cec04b534f..89e45b7cfc 100644
--- a/test/CodeGen/X86/misched-new.ll
+++ b/test/CodeGen/X86/misched-new.ll
@@ -1,6 +1,9 @@
 ; RUN: llc < %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
 ; RUN:          -misched=shuffle -misched-bottomup -verify-machineinstrs \
 ; RUN:     | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -x86-early-ifcvt -enable-misched \
+; RUN:          -misched=shuffle -misched-topdown -verify-machineinstrs \
+; RUN:     | FileCheck %s --check-prefix TOPDOWN
 ; REQUIRES: asserts
 ;
 ; Interesting MachineScheduler cases.
@@ -51,3 +54,56 @@ if.end:                                           ; preds = %if.then, %entry
 }
 
 declare void @bar(i32,i32)
+
+; Test that the DAG builder can handle an undef vreg on ExitSU.
+; CHECK: hasundef
+; CHECK: call
+
+%t0 = type { i32, i32, i8 }
+%t6 = type { i32 (...)**, %t7* }
+%t7 = type { i32 (...)** }
+
+define void @hasundef() unnamed_addr uwtable ssp align 2 {
+  %1 = alloca %t0, align 8
+  br i1 undef, label %3, label %2
+
+; <label>:2                                       ; preds = %0
+  unreachable
+
+; <label>:3                                       ; preds = %0
+  br i1 undef, label %4, label %5
+
+; <label>:4                                       ; preds = %3
+  call void undef(%t6* undef, %t0* %1)
+  unreachable
+
+; <label>:5                                       ; preds = %3
+  ret void
+}
+
+; Test top-down subregister liveness tracking. Self-verification
+; catches any pressure set underflow.
+; rdar://12797931.
+;
+; TOPDOWN: @testSubregTracking
+; TOPDOWN: divb
+; TOPDOWN: movzbl %al
+; TOPDOWN: ret
+define void @testSubregTracking() nounwind uwtable ssp align 2 {
+  %tmp = load i8* undef, align 1
+  %tmp6 = sub i8 0, %tmp
+  %tmp7 = load i8* undef, align 1
+  %tmp8 = udiv i8 %tmp6, %tmp7
+  %tmp9 = zext i8 %tmp8 to i64
+  %tmp10 = load i8* undef, align 1
+  %tmp11 = zext i8 %tmp10 to i64
+  %tmp12 = mul i64 %tmp11, %tmp9
+  %tmp13 = urem i8 %tmp6, %tmp7
+  %tmp14 = zext i8 %tmp13 to i32
+  %tmp15 = add nsw i32 %tmp14, 0
+  %tmp16 = add i32 %tmp15, 0
+  store i32 %tmp16, i32* undef, align 4
+  %tmp17 = add i64 0, %tmp12
+  store i64 %tmp17, i64* undef, align 8
+  ret void
+}
diff --git a/test/CodeGen/X86/rdrand.ll b/test/CodeGen/X86/rdrand.ll
index e2224a6196..98f4077763 100644
--- a/test/CodeGen/X86/rdrand.ll
+++ b/test/CodeGen/X86/rdrand.ll
@@ -39,7 +39,7 @@ define i32 @_rdrand64_step(i64* %random_val) {
   %isvalid = extractvalue {i64, i32} %call, 1
   ret i32 %isvalid
 ; CHECK: _rdrand64_step:
-; CHECK: rdrandq	%r[[T1:[[a-z]+]]
+; CHECK: rdrandq	%r[[T1:[a-z]+]]
 ; CHECK: movq	%r[[T1]], (%r[[A0]])
 ; CHECK: movl	$1, %eax
 ; CHECK: cmovael	%e[[T1]], %eax
diff --git a/test/CodeGen/X86/sext-load.ll b/test/CodeGen/X86/sext-load.ll
index c9b39d3a48..58c93229a2 100644
--- a/test/CodeGen/X86/sext-load.ll
+++ b/test/CodeGen/X86/sext-load.ll
@@ -1,9 +1,30 @@
-; RUN: llc < %s -march=x86 | grep movsbl
+; RUN: llc < %s -march=x86 | FileCheck %s
 
-define i32 @foo(i32 %X) nounwind  {
+; When doing sign extension, use the sext-load lowering to take advantage of
+; x86's sign extension during loads.
+;
+; CHECK: test1:
+; CHECK:      movsbl {{.*}}, %eax
+; CHECK-NEXT: ret
+define i32 @test1(i32 %X) nounwind  {
 entry:
 	%tmp12 = trunc i32 %X to i8		; <i8> [#uses=1]
 	%tmp123 = sext i8 %tmp12 to i32		; <i32> [#uses=1]
 	ret i32 %tmp123
 }
 
+; When using a sextload representation, ensure that the sign extension is
+; preserved even when removing shifted-out low bits.
+;
+; CHECK: test2:
+; CHECK:      movswl {{.*}}, %eax
+; CHECK-NEXT: ret
+define i32 @test2({i16, [6 x i8]}* %this) {
+entry:
+  %b48 = getelementptr inbounds { i16, [6 x i8] }* %this, i32 0, i32 1
+  %cast = bitcast [6 x i8]* %b48 to i48*
+  %bf.load = load i48* %cast, align 2
+  %bf.ashr = ashr i48 %bf.load, 32
+  %bf.cast = trunc i48 %bf.ashr to i32
+  ret i32 %bf.cast
+}
diff --git a/test/CodeGen/X86/sse2-blend.ll b/test/CodeGen/X86/sse2-blend.ll
index 2f4317bf29..67ce1be135 100644
--- a/test/CodeGen/X86/sse2-blend.ll
+++ b/test/CodeGen/X86/sse2-blend.ll
@@ -28,33 +28,31 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
 
 ; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_i64
-; CHECK: xorps
-; CHECK: andps
 ; CHECK: andnps
+; CHECK: andps
 ; CHECK: orps
 ; CHECK: ret
 
-define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {
-  %A = load <4 x i64>* %v1
-  %B = load <4 x i64>* %v2
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %A, <4 x i64> %B
-  store <4 x i64 > %vsel, <4 x i64>* %v1
+define void@vsel_i64(<2 x i64>* %v1, <2 x i64>* %v2) {
+  %A = load <2 x i64>* %v1
+  %B = load <2 x i64>* %v2
+  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %A, <2 x i64> %B
+  store <2 x i64 > %vsel, <2 x i64>* %v1
   ret void
 }
 
 ; Without forcing instructions, fall back to the preferred PS domain.
 ; CHECK: vsel_double
-; CHECK: xorps
-; CHECK: andps
 ; CHECK: andnps
+; CHECK: andps
 ; CHECK: orps
 ; CHECK: ret
 
-define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
-  %A = load <4 x double>* %v1
-  %B = load <4 x double>* %v2
-  %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> %A, <4 x double> %B
-  store <4 x double > %vsel, <4 x double>* %v1
+define void@vsel_double(<2 x double>* %v1, <2 x double>* %v2) {
+  %A = load <2 x double>* %v1
+  %B = load <2 x double>* %v2
+  %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %A, <2 x double> %B
+  store <2 x double > %vsel, <2 x double>* %v1
   ret void
 }
 
diff --git a/test/CodeGen/X86/tailcall-fastisel.ll b/test/CodeGen/X86/tailcall-fastisel.ll
index 7f92af4dca..842ed25439 100644
--- a/test/CodeGen/X86/tailcall-fastisel.ll
+++ b/test/CodeGen/X86/tailcall-fastisel.ll
@@ -1,12 +1,11 @@
-; RUN: llc < %s -march=x86-64 -tailcallopt -fast-isel | not grep TAILCALL
-
-; Fast-isel shouldn't attempt to cope with tail calls.
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -tailcallopt -fast-isel -fast-isel-abort | FileCheck %s
 
 %0 = type { i64, i32, i8* }
 
 define fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 %arg1) nounwind {
 fail:                                             ; preds = %entry
   %tmp20 = tail call fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 undef) ; <i8*> [#uses=1]
+; CHECK: jmp "_visit_array_aux<`Reference>" ## TAILCALL
   ret i8* %tmp20
 }
 
diff --git a/test/CodeGen/X86/vec_shuffle-20.ll b/test/CodeGen/X86/vec_shuffle-20.ll
index 976cd1835b..b6b8ba6f84 100644
--- a/test/CodeGen/X86/vec_shuffle-20.ll
+++ b/test/CodeGen/X86/vec_shuffle-20.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -o /dev/null -march=x86 -mcpu=corei7 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
+; RUN: llc < %s -o /dev/null -march=x86 -mcpu=corei7 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 2
 
 define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/vec_zero.ll b/test/CodeGen/X86/vec_zero.ll
index 682a0dfca8..c3ea0ad202 100644
--- a/test/CodeGen/X86/vec_zero.ll
+++ b/test/CodeGen/X86/vec_zero.ll
@@ -13,7 +13,7 @@ define void @foo(<4 x float>* %P) {
 ; CHECK: pxor
 define void @bar(<4 x i32>* %P) {
         %T = load <4 x i32>* %P         ; <<4 x i32>> [#uses=1]
-        %S = add <4 x i32> zeroinitializer, %T          ; <<4 x i32>> [#uses=1]
+        %S = sub <4 x i32> zeroinitializer, %T          ; <<4 x i32>> [#uses=1]
         store <4 x i32> %S, <4 x i32>* %P
         ret void
 }
diff --git a/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll b/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
index 9bb35fab4f..accdf8a86c 100644
--- a/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
+++ b/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
@@ -25,6 +25,6 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !14 = metadata !{i32 524289, metadata !4, metadata !"", metadata !4, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !15, metadata !16, i32 0, null} ; [ DW_TAG_array_type ]
 !15 = metadata !{i32 524324, metadata !4, metadata !"char", metadata !4, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
 !16 = metadata !{metadata !17}
-!17 = metadata !{i32 524321, i64 0, i64 0}        ; [ DW_TAG_subrange_type ]
+!17 = metadata !{i32 524321, i64 0, i64 1}        ; [ DW_TAG_subrange_type ]
 !18 = metadata !{metadata !"llvm.mdnode.fwdref.19"}
 !19 = metadata !{metadata !"llvm.mdnode.fwdref.23"}
diff --git a/test/DebugInfo/2010-05-03-OriginDIE.ll b/test/DebugInfo/2010-05-03-OriginDIE.ll
index 94bddc092f..1ade045046 100644
--- a/test/DebugInfo/2010-05-03-OriginDIE.ll
+++ b/test/DebugInfo/2010-05-03-OriginDIE.ll
@@ -81,6 +81,6 @@ declare void @uuid_LtoB(i8*, i8*)
 !30 = metadata !{i32 524310, metadata !3, metadata !"uint32_t", metadata !12, i32 55, i64 0, i64 0, i64 0, i32 0, metadata !31} ; [ DW_TAG_typedef ]
 !31 = metadata !{i32 524324, metadata !3, metadata !"unsigned int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
 !32 = metadata !{metadata !33}
-!33 = metadata !{i32 524321, i64 0, i64 1}        ; [ DW_TAG_subrange_type ]
+!33 = metadata !{i32 524321, i64 0, i64 2}        ; [ DW_TAG_subrange_type ]
 !34 = metadata !{i32 524544, metadata !24, metadata !"addr", metadata !10, i32 96, metadata !35} ; [ DW_TAG_auto_variable ]
 !35 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
diff --git a/test/DebugInfo/X86/empty-and-one-elem-array.ll b/test/DebugInfo/X86/empty-and-one-elem-array.ll
new file mode 100644
index 0000000000..b9224b1fde
--- /dev/null
+++ b/test/DebugInfo/X86/empty-and-one-elem-array.ll
@@ -0,0 +1,93 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -O0 -filetype=obj -o %t < %s
+; RUN: llvm-dwarfdump %t | FileCheck %s
+; <rdar://problem/12566646>
+
+%struct.foo = type { i32, [1 x i32] }
+%struct.bar = type { i32, [0 x i32] }
+
+define i32 @func() nounwind uwtable ssp {
+entry:
+  %my_foo = alloca %struct.foo, align 4
+  %my_bar = alloca %struct.bar, align 4
+  call void @llvm.dbg.declare(metadata !{%struct.foo* %my_foo}, metadata !10), !dbg !19
+  call void @llvm.dbg.declare(metadata !{%struct.bar* %my_bar}, metadata !20), !dbg !28
+  %a = getelementptr inbounds %struct.foo* %my_foo, i32 0, i32 0, !dbg !29
+  store i32 3, i32* %a, align 4, !dbg !29
+  %a1 = getelementptr inbounds %struct.bar* %my_bar, i32 0, i32 0, !dbg !30
+  store i32 5, i32* %a1, align 4, !dbg !30
+  %a2 = getelementptr inbounds %struct.foo* %my_foo, i32 0, i32 0, !dbg !31
+  %0 = load i32* %a2, align 4, !dbg !31
+  %a3 = getelementptr inbounds %struct.bar* %my_bar, i32 0, i32 0, !dbg !31
+  %1 = load i32* %a3, align 4, !dbg !31
+  %add = add nsw i32 %0, %1, !dbg !31
+  ret i32 %add, !dbg !31
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+; An empty array should not have an AT_upper_bound attribute. But an array of 1
+; should.
+
+; CHECK:      0x00000074:   DW_TAG_base_type [5]  
+; CHECK-NEXT: 0x00000075:     DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000043] = "int")
+; CHECK-NEXT: 0x00000079:     DW_AT_encoding [DW_FORM_data1]   (0x05)
+; CHECK-NEXT: 0x0000007a:     DW_AT_byte_size [DW_FORM_data1]  (0x04)
+
+; int[1]:
+; CHECK:      0x0000007e:   DW_TAG_array_type [7] *
+; CHECK-NEXT: 0x0000007f:     DW_AT_type [DW_FORM_ref4]    (cu + 0x0074 => {0x00000074})
+; CHECK:      0x00000083:     DW_TAG_subrange_type [8]  
+; CHECK-NEXT: 0x00000084:       DW_AT_type [DW_FORM_ref4]  (cu + 0x007b => {0x0000007b})
+; CHECK-NEXT: 0x00000088:       DW_AT_upper_bound [DW_FORM_data1]  (0x00)
+
+; int foo::b[1]:
+; CHECK:      0x000000a1:     DW_TAG_member [10]  
+; CHECK-NEXT: 0x000000a2:       DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000050] = "b")
+; CHECK-NEXT: 0x000000a6:       DW_AT_type [DW_FORM_ref4]  (cu + 0x007e => {0x0000007e})
+
+; int[0]:
+; CHECK:      0x000000b1:   DW_TAG_array_type [7] *
+; CHECK-NEXT: 0x000000b2:     DW_AT_type [DW_FORM_ref4]    (cu + 0x0074 => {0x00000074})
+; CHECK:      0x000000b6:     DW_TAG_subrange_type [11]  
+; CHECK-NEXT: 0x000000b7:       DW_AT_type [DW_FORM_ref4]  (cu + 0x007b => {0x0000007b})
+; CHECK-NOT:  DW_AT_upper_bound
+
+; int bar::b[0]:
+; CHECK:      0x000000d3:     DW_TAG_member [10]  
+; CHECK-NEXT: 0x000000d4:       DW_AT_name [DW_FORM_strp]  ( .debug_str[0x00000050] = "b")
+; CHECK-NEXT: 0x000000d8:       DW_AT_type [DW_FORM_ref4]  (cu + 0x00b1 => {0x000000b1})
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"test.c", metadata !"/Volumes/Sandbox/llvm", metadata !"clang version 3.3 (trunk 169136)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/test.c] [DW_LANG_C99]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"func", metadata !"func", metadata !"", metadata !6, i32 11, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @func, null, null, metadata !1, i32 11} ; [ DW_TAG_subprogram ] [line 11] [def] [func]
+!6 = metadata !{i32 786473, metadata !"test.c", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 786688, metadata !11, metadata !"my_foo", metadata !6, i32 12, metadata !12, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [my_foo] [line 12]
+!11 = metadata !{i32 786443, metadata !5, i32 11, i32 0, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/Volumes/Sandbox/llvm/test.c]
+!12 = metadata !{i32 786451, null, metadata !"foo", metadata !6, i32 1, i64 64, i64 32, i32 0, i32 0, null, metadata !13, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [foo] [line 1, size 64, align 32, offset 0] [from ]
+!13 = metadata !{metadata !14, metadata !15}
+!14 = metadata !{i32 786445, metadata !12, metadata !"a", metadata !6, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 2, size 32, align 32, offset 0] [from int]
+!15 = metadata !{i32 786445, metadata !12, metadata !"b", metadata !6, i32 3, i64 32, i64 32, i64 32, i32 0, metadata !16} ; [ DW_TAG_member ] [b] [line 3, size 32, align 32, offset 32] [from ]
+!16 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 32, i64 32, i32 0, i32 0, metadata !9, metadata !17, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 32, align 32, offset 0] [from int]
+!17 = metadata !{metadata !18}
+!18 = metadata !{i32 786465, i64 0, i64 1} ; [ DW_TAG_subrange_type ] [0, 1]
+!19 = metadata !{i32 12, i32 0, metadata !11, null}
+!20 = metadata !{i32 786688, metadata !11, metadata !"my_bar", metadata !6, i32 13, metadata !21, i32 0, i32 0} ; [ DW_TAG_auto_variable ] [my_bar] [line 13]
+!21 = metadata !{i32 786451, null, metadata !"bar", metadata !6, i32 6, i64 32, i64 32, i32 0, i32 0, null, metadata !22, i32 0, i32 0, i32 0} ; [ DW_TAG_structure_type ] [bar] [line 6, size 32, align 32, offset 0] [from ]
+!22 = metadata !{metadata !23, metadata !24}
+!23 = metadata !{i32 786445, metadata !21, metadata !"a", metadata !6, i32 7, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_member ] [a] [line 7, size 32, align 32, offset 0] [from int]
+!24 = metadata !{i32 786445, metadata !21, metadata !"b", metadata !6, i32 8, i64 0, i64 32, i64 32, i32 0, metadata !25} ; [ DW_TAG_member ] [b] [line 8, size 0, align 32, offset 32] [from ]
+!25 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!26 = metadata !{metadata !27}
+!27 = metadata !{i32 786465, i64 0, i64 0} ; [ DW_TAG_subrange_type ] [0, 0]
+!28 = metadata !{i32 13, i32 0, metadata !11, null}
+!29 = metadata !{i32 15, i32 0, metadata !11, null}
+!30 = metadata !{i32 16, i32 0, metadata !11, null}
+!31 = metadata !{i32 17, i32 0, metadata !11, null}
diff --git a/test/DebugInfo/X86/empty-array.ll b/test/DebugInfo/X86/empty-array.ll
new file mode 100644
index 0000000000..cd968478ab
--- /dev/null
+++ b/test/DebugInfo/X86/empty-array.ll
@@ -0,0 +1,45 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -O0 -filetype=obj -o %t < %s
+; RUN: llvm-dwarfdump %t | FileCheck %s
+; <rdar://problem/12566646>
+
+%class.A = type { [0 x i32] }
+
+@a = global %class.A zeroinitializer, align 4
+
+; CHECK:      0x0000002d:   DW_TAG_base_type [3]  
+; CHECK-NEXT: 0x0000002e:     DW_AT_byte_size [DW_FORM_data1]  (0x04)
+; CHECK-NEXT: 0x0000002f:     DW_AT_encoding [DW_FORM_data1]   (0x05)
+
+; CHECK:      0x00000030:   DW_TAG_array_type [4] *
+; CHECK-NEXT: 0x00000031:     DW_AT_type [DW_FORM_ref4]    (cu + 0x0026 => {0x00000026})
+
+; CHECK:      0x00000035:     DW_TAG_subrange_type [5]  
+; CHECK-NEXT: 0x00000036:       DW_AT_type [DW_FORM_ref4]  (cu + 0x002d => {0x0000002d})
+; CHECK-NOT:  DW_AT_upper_bound
+
+; CHECK:      0x00000048:     DW_TAG_member [8]  
+; CHECK-NEXT: 0x00000049:       DW_AT_name [DW_FORM_strp]  ( .debug_str[0x0000003f] = "x")
+; CHECK-NEXT: 0x0000004d:       DW_AT_type [DW_FORM_ref4]  (cu + 0x0030 => {0x00000030})
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm", metadata !"clang version 3.3 (trunk 169136)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a} ; [ DW_TAG_variable ] [a] [line 1] [def]
+!6 = metadata !{i32 786473, metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !14}
+!9 = metadata !{i32 786445, metadata !7, metadata !"x", metadata !6, i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ]
+!10 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!11 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786465, i64 0, i64 -1} ; [ DW_TAG_subrange_type ] [unbound]
+!14 = metadata !{i32 786478, i32 0, metadata !7, metadata !"A", metadata !"A", metadata !"", metadata !6, i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A]
+!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{null, metadata !17}
+!17 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!18 = metadata !{metadata !19}
+!19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
diff --git a/test/DebugInfo/X86/fission-cu.ll b/test/DebugInfo/X86/fission-cu.ll
new file mode 100644
index 0000000000..fe4d5b0d52
--- /dev/null
+++ b/test/DebugInfo/X86/fission-cu.ll
@@ -0,0 +1,26 @@
+; RUN: llc -split-dwarf=Enable -O0 %s -mtriple=x86_64-unknown-linux-gnu -filetype=obj -o %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+@a = common global i32 0, align 4
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"baz.c", metadata !"/usr/local/google/home/echristo/tmp", metadata !"clang version 3.3 (trunk 169021) (llvm/trunk 169020)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [/usr/local/google/home/echristo/tmp/baz.c] [DW_LANG_C99]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, i32* @a} ; [ DW_TAG_variable ] [a] [line 1] [def]
+!6 = metadata !{i32 786473, metadata !"baz.c", metadata !"/usr/local/google/home/echristo/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+
+; Check that the skeleton compile unit contains the proper attributes:
+; This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list,
+; DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id,
+; DW_AT_ranges_base, DW_AT_addr_base.
+
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_GNU_dwo_name [DW_FORM_strp] ( .debug_str[0x00000035] = "baz.c")
+; CHECK: DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
+; CHECK: DW_AT_stmt_list [DW_FORM_data4]   (0x00000000)
+; CHECK: DW_AT_comp_dir [DW_FORM_strp]     ( .debug_str[0x0000003b] = "/usr/local/google/home/echristo/tmp")
diff --git a/test/DebugInfo/X86/nondefault-subrange-array.ll b/test/DebugInfo/X86/nondefault-subrange-array.ll
new file mode 100644
index 0000000000..5845f3e0b8
--- /dev/null
+++ b/test/DebugInfo/X86/nondefault-subrange-array.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -O0 -filetype=obj -o %t < %s
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+%class.A = type { [42 x i32] }
+
+@a = global %class.A zeroinitializer, align 4
+
+; Check that we can handle non-default array bounds. In this case, the array
+; goes from [-3, 38].
+
+; CHECK:      0x0000002d:   DW_TAG_base_type [3]  
+; CHECK-NEXT: 0x0000002e:     DW_AT_byte_size [DW_FORM_data1]  (0x04)
+; CHECK-NEXT: 0x0000002f:     DW_AT_encoding [DW_FORM_data1]   (0x05)
+
+; CHECK:      0x00000030:   DW_TAG_array_type [4] *
+; CHECK-NEXT: 0x00000031:     DW_AT_type [DW_FORM_ref4]    (cu + 0x0026 => {0x00000026})
+
+; CHECK:      0x00000035:     DW_TAG_subrange_type [5]
+; CHECK-NEXT: 0x00000036:       DW_AT_type [DW_FORM_ref4]  (cu + 0x002d => {0x0000002d})
+; CHECK-NEXT: 0x0000003a:       DW_AT_lower_bound [DW_FORM_data8]	(0xfffffffffffffffd)
+; CHECK-NEXT: 0x00000042:       DW_AT_upper_bound [DW_FORM_data1]	(0x26)
+
+; CHECK:      0x00000051:     DW_TAG_member [8]  
+; CHECK-NEXT: 0x00000052:       DW_AT_name [DW_FORM_strp]	( .debug_str[0x0000003f] = "x")
+; CHECK-NEXT: 0x00000056:       DW_AT_type [DW_FORM_ref4]	(cu + 0x0030 => {0x00000030})
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm", metadata !"clang version 3.3 (trunk 169136)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !1, metadata !3} ; [ DW_TAG_compile_unit ] [/Volumes/Sandbox/llvm/t.cpp] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786484, i32 0, null, metadata !"a", metadata !"a", metadata !"", metadata !6, i32 1, metadata !7, i32 0, i32 1, %class.A* @a} ; [ DW_TAG_variable ] [a] [line 1] [def]
+!6 = metadata !{i32 786473, metadata !"t.cpp", metadata !"/Volumes/Sandbox/llvm", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786434, null, metadata !"A", metadata !6, i32 1, i64 0, i64 32, i32 0, i32 0, null, metadata !8, i32 0, null, null} ; [ DW_TAG_class_type ] [A] [line 1, size 0, align 32, offset 0] [from ]
+!8 = metadata !{metadata !9, metadata !14}
+!9 = metadata !{i32 786445, metadata !7, metadata !"x", metadata !6, i32 1, i64 0, i64 0, i64 0, i32 1, metadata !10} ; [ DW_TAG_member ] [x] [line 1, size 0, align 0, offset 0] [private] [from ]
+!10 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !11, metadata !12, i32 0, i32 0} ; [ DW_TAG_array_type ] [line 0, size 0, align 32, offset 0] [from int]
+!11 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 786465, i64 -3, i64 42} ; [ DW_TAG_subrange_type ] [-3, 39]
+!14 = metadata !{i32 786478, i32 0, metadata !7, metadata !"A", metadata !"A", metadata !"", metadata !6, i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !18, i32 1} ; [ DW_TAG_subprogram ] [line 1] [A]
+!15 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!16 = metadata !{null, metadata !17}
+!17 = metadata !{i32 786447, i32 0, metadata !"", i32 0, i32 0, i64 64, i64 64, i64 0, i32 1088, metadata !7} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from A]
+!18 = metadata !{metadata !19}
+!19 = metadata !{i32 786468}                      ; [ DW_TAG_base_type ] [line 0, size 0, align 0, offset 0]
diff --git a/test/DebugInfo/X86/op_deref.ll b/test/DebugInfo/X86/op_deref.ll
index c84b2e6931..b0b09e74f1 100644
--- a/test/DebugInfo/X86/op_deref.ll
+++ b/test/DebugInfo/X86/op_deref.ll
@@ -76,7 +76,7 @@ declare void @llvm.stackrestore(i8*) nounwind
 !14 = metadata !{i32 786688, metadata !13, metadata !"vla", metadata !6, i32 3, metadata !15, i32 0, i32 0, i64 2} ; [ DW_TAG_auto_variable ]
 !15 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 0, i64 32, i32 0, i32 0, metadata !9, metadata !16, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !16 = metadata !{metadata !17}
-!17 = metadata !{i32 786465, i64 1, i64 0}        ; [ DW_TAG_subrange_type ]
+!17 = metadata !{i32 786465, i64 0, i64 -1}        ; [ DW_TAG_subrange_type ]
 !18 = metadata !{i32 3, i32 7, metadata !13, null}
 !19 = metadata !{i32 786688, metadata !13, metadata !"i", metadata !6, i32 4, metadata !9, i32 0, i32 0} ; [ DW_TAG_auto_variable ]
 !20 = metadata !{i32 4, i32 7, metadata !13, null}
diff --git a/test/DebugInfo/X86/pr13303.ll b/test/DebugInfo/X86/pr13303.ll
new file mode 100644
index 0000000000..e820cb564c
--- /dev/null
+++ b/test/DebugInfo/X86/pr13303.ll
@@ -0,0 +1,28 @@
+; RUN: llc %s -o %t -filetype=obj -mtriple=x86_64-unknown-linux-gnu
+; RUN: llvm-dwarfdump %t | FileCheck %s
+; PR13303
+
+; Check that the prologue ends with is_stmt here.
+; CHECK: 0x0000000000000000 {{.*}} is_stmt
+
+define i32 @main() nounwind uwtable {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  ret i32 0, !dbg !10
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"PR13303.c", metadata !"/home/probinson", metadata !"clang version 3.2 (trunk 160143)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/home/probinson/PR13303.c] [DW_LANG_C99]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 1, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main, null, null, metadata !1, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [main]
+!6 = metadata !{i32 786473, metadata !"PR13303.c", metadata !"/home/probinson", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!10 = metadata !{i32 1, i32 14, metadata !11, null}
+!11 = metadata !{i32 786443, metadata !5, i32 1, i32 12, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/home/probinson/PR13303.c]
diff --git a/test/DebugInfo/array.ll b/test/DebugInfo/array.ll
index 9f592a12a9..d1671ad2fd 100644
--- a/test/DebugInfo/array.ll
+++ b/test/DebugInfo/array.ll
@@ -29,6 +29,6 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 ;CHECK-NOT: DW_AT_lower_bound
 ;CHECK-NOT: DW_AT_upper_bound
 ;CHECK-NEXT: End Of Children Mark
-!10 = metadata !{i32 589857, i64 1, i64 0}        ; [ DW_TAG_subrange_type ]
+!10 = metadata !{i32 589857, i64 0, i64 -1}        ; [ DW_TAG_subrange_type ]
 !11 = metadata !{i32 4, i32 7, metadata !7, null}
 !12 = metadata !{i32 5, i32 3, metadata !7, null}
diff --git a/test/DebugInfo/two-cus-from-same-file.ll b/test/DebugInfo/two-cus-from-same-file.ll
new file mode 100644
index 0000000000..d01aeea3bf
--- /dev/null
+++ b/test/DebugInfo/two-cus-from-same-file.ll
@@ -0,0 +1,72 @@
+; For http://llvm.org/bugs/show_bug.cgi?id=12942
+;   There are two CUs coming from /tmp/foo.c in this module. Make sure it doesn't
+;   blow llc up and produces something reasonable.
+;
+
+; RUN: llc %s -o %t -filetype=obj -O0
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; ModuleID = 'test.bc'
+
+@str = private unnamed_addr constant [4 x i8] c"FOO\00"
+@str1 = private unnamed_addr constant [6 x i8] c"Main!\00"
+
+define void @foo() nounwind {
+entry:
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([4 x i8]* @str, i32 0, i32 0)), !dbg !23
+  ret void, !dbg !25
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !21), !dbg !26
+  tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !22), !dbg !27
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @str1, i32 0, i32 0)), !dbg !28
+  tail call void @foo() nounwind, !dbg !30
+  ret i32 0, !dbg !31
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.cu = !{!0, !9}
+
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"foo.c", metadata !"/tmp", metadata !"clang version 3.2 (trunk 156513)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"foo", metadata !"foo", metadata !"", metadata !6, i32 5, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void ()* @foo, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 786473, metadata !"foo.c", metadata !"/tmp", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null}
+!9 = metadata !{i32 786449, i32 0, i32 12, metadata !"foo.c", metadata !"/tmp", metadata !"clang version 3.2 (trunk 156513)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !10, metadata !1} ; [ DW_TAG_compile_unit ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 786478, i32 0, metadata !6, metadata !"main", metadata !"main", metadata !"", metadata !6, i32 11, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main, null, null, metadata !19, i32 11} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!14 = metadata !{metadata !15, metadata !15, metadata !16}
+!15 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!16 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !17} ; [ DW_TAG_pointer_type ]
+!17 = metadata !{i32 786447, null, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !18} ; [ DW_TAG_pointer_type ]
+!18 = metadata !{i32 786468, null, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!19 = metadata !{metadata !20}
+!20 = metadata !{metadata !21, metadata !22}
+!21 = metadata !{i32 786689, metadata !12, metadata !"argc", metadata !6, i32 16777227, metadata !15, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 786689, metadata !12, metadata !"argv", metadata !6, i32 33554443, metadata !16, i32 0, i32 0} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 6, i32 3, metadata !24, null}
+!24 = metadata !{i32 786443, metadata !5, i32 5, i32 16, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!25 = metadata !{i32 7, i32 1, metadata !24, null}
+!26 = metadata !{i32 11, i32 14, metadata !12, null}
+!27 = metadata !{i32 11, i32 26, metadata !12, null}
+!28 = metadata !{i32 12, i32 3, metadata !29, null}
+!29 = metadata !{i32 786443, metadata !12, i32 11, i32 34, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 13, i32 3, metadata !29, null}
+!31 = metadata !{i32 14, i32 3, metadata !29, null}
+
+; This test is simple to be cross platform (many targets don't yet have
+; sufficiently good DWARF emission and/or dumping)
+; CHECK: {{DW_TAG_compile_unit}}
+; CHECK: {{foo\.c}}
+
diff --git a/test/Feature/properties.ll b/test/Feature/properties.ll
index c688d689be..2111fa2f8d 100644
--- a/test/Feature/properties.ll
+++ b/test/Feature/properties.ll
@@ -4,4 +4,3 @@
 
 target datalayout = "e-p:32:32"
 target triple = "proc-vend-sys"
-deplibs = [ "m", "c" ]
diff --git a/test/FileCheck/regex-brackets.txt b/test/FileCheck/regex-brackets.txt
new file mode 100644
index 0000000000..fd8568d3a5
--- /dev/null
+++ b/test/FileCheck/regex-brackets.txt
@@ -0,0 +1,7 @@
+// RUN: FileCheck -input-file %s %s
+
+op r1
+op r2, [x r1]
+; CHECK:        op [[REG:r[0-9]]]
+; CHECK:        op [[REG2:r[0-9]]], [x [[REG]]]
+
diff --git a/test/FileCheck/simple-var-capture.txt b/test/FileCheck/simple-var-capture.txt
index c0214d9017..a487baaa53 100644
--- a/test/FileCheck/simple-var-capture.txt
+++ b/test/FileCheck/simple-var-capture.txt
@@ -2,7 +2,7 @@
 
 op1 r1
 op2 r1, r2
-; CHECK:        op1 [[REG:r[0-9]+]]
+; CHECK:        op1 [[REG:r[0-9]]]
 ; CHECK-NEXT:   op2 [[REG]]
 
 op3 r16, r18, r21
@@ -10,3 +10,4 @@ op4 r30, r18, r21
 ; CHECK:        op3 {{r[0-9]+}}, [[REGa:r[0-9]+]], [[REGb:r[0-9]+]]
 ; CHECK-NEXT:   op4 {{r[0-9]+}}, [[REGa]], [[REGb]]
 
+
diff --git a/test/FileCheck/two-checks-for-same-match.txt b/test/FileCheck/two-checks-for-same-match.txt
new file mode 100644
index 0000000000..2195aa96c5
--- /dev/null
+++ b/test/FileCheck/two-checks-for-same-match.txt
@@ -0,0 +1,8 @@
+// Check that two distinct CHECK lines won't match the same string
+// RUN: not FileCheck -input-file %s %s
+
+; CHECK: {{a[0-9]b}}
+; CHECK: {{a[0-9]b}}
+
+a2b
+
diff --git a/test/FileCheck/var-ref-same-line.txt b/test/FileCheck/var-ref-same-line.txt
new file mode 100644
index 0000000000..1755cefbf8
--- /dev/null
+++ b/test/FileCheck/var-ref-same-line.txt
@@ -0,0 +1,16 @@
+// Test for referencing a variable defined on the same line
+// RUN: FileCheck -input-file %s %s
+
+op1 r1, r2, r1
+
+; CHECK: op1 [[REG:r[0-9]+]], {{r[0-9]+}}, [[REG]]
+
+op3 r1, r2, r1, r2
+
+; CHECK: op3 [[REG1:r[0-9]+]], [[REG2:r[0-9]+]], [[REG1]], [[REG2]]
+
+op4 g1, g2, g1
+
+; Test that parens inside the regex don't confuse FileCheck
+; CHECK: {{([a-z]+[0-9])+}} [[REG:g[0-9]+]], {{g[0-9]+}}, [[REG]]
+
diff --git a/test/Instrumentation/AddressSanitizer/instrument-no-return.ll b/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
index 80f1b1c74c..e8f62b5485 100644
--- a/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
@@ -7,11 +7,22 @@ target triple = "x86_64-unknown-linux-gnu"
 
 declare void @MyNoReturnFunc(i32) noreturn
 
-define i32 @_Z5ChildPv(i8* nocapture %arg) uwtable address_safety {
+define i32 @Call1(i8* nocapture %arg) uwtable address_safety {
 entry:
-  call void @MyNoReturnFunc(i32 1) noreturn
+  call void @MyNoReturnFunc(i32 1) noreturn  ; The call insn has noreturn attr.
+; CHECK:        @Call1
+; CHECK:        call void @__asan_handle_no_return
+; CHECK-NEXT:   call void @MyNoReturnFunc
+; CHECK-NEXT: unreachable
   unreachable
 }
 
+define i32 @Call2(i8* nocapture %arg) uwtable address_safety {
+entry:
+  call void @MyNoReturnFunc(i32 1)  ; No noreturn attribure on the call.
+; CHECK:        @Call2
 ; CHECK:        call void @__asan_handle_no_return
 ; CHECK-NEXT:   call void @MyNoReturnFunc
+; CHECK-NEXT: unreachable
+  unreachable
+}
diff --git a/test/Instrumentation/AddressSanitizer/instrument_global.ll b/test/Instrumentation/AddressSanitizer/instrument_global.ll
index 3d92946087..2c183f523f 100644
--- a/test/Instrumentation/AddressSanitizer/instrument_global.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument_global.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -asan -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 @xxx = global i32 0, align 4
diff --git a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
index c11a0498c3..042c06ba96 100644
--- a/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -asan -asan-initialization-order -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -asan-initialization-order -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 @xxx = internal global i32 0, align 4  ; With dynamic initializer.
diff --git a/test/Instrumentation/AddressSanitizer/lifetime.ll b/test/Instrumentation/AddressSanitizer/lifetime.ll
new file mode 100644
index 0000000000..55cd475f1f
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/lifetime.ll
@@ -0,0 +1,61 @@
+; Test hanlding of llvm.lifetime intrinsics.
+; RUN: opt < %s -asan -asan-check-lifetime -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+
+define void @lifetime_no_size() address_safety {
+entry:
+  %i = alloca i32, align 4
+  %i.ptr = bitcast i32* %i to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %i.ptr)
+  call void @llvm.lifetime.end(i64 -1, i8* %i.ptr)
+
+; Check that lifetime with no size are ignored.
+; CHECK: @lifetime_no_size
+; CHECK-NOT: @__asan_poison_stack_memory
+; CHECK-NOT: @__asan_unpoison_stack_memory
+; CHECK: ret void
+  ret void
+}
+
+; Generic case of lifetime analysis.
+define void @lifetime() address_safety {
+  ; CHECK: @lifetime
+
+  ; Regular variable lifetime intrinsics.
+  %i = alloca i32, align 4
+  %i.ptr = bitcast i32* %i to i8*
+  call void @llvm.lifetime.start(i64 3, i8* %i.ptr)
+  ; Memory is unpoisoned at llvm.lifetime.start
+  ; CHECK: %[[VAR:[^ ]*]] = ptrtoint i8* %i.ptr to i64
+  ; CHECK-NEXT: call void @__asan_unpoison_stack_memory(i64 %[[VAR]], i64 3)
+  call void @llvm.lifetime.end(i64 4, i8* %i.ptr)
+  call void @llvm.lifetime.end(i64 2, i8* %i.ptr)
+  ; Memory is poisoned at every call to llvm.lifetime.end
+  ; CHECK: call void @__asan_poison_stack_memory(i64 %{{[^ ]+}}, i64 4)
+  ; CHECK: call void @__asan_poison_stack_memory(i64 %{{[^ ]+}}, i64 2)
+
+  ; Lifetime intrinsics for array.
+  %arr = alloca [10 x i32], align 16
+  %arr.ptr = bitcast [10 x i32]* %arr to i8*
+  call void @llvm.lifetime.start(i64 40, i8* %arr.ptr)
+  ; CHECK: call void @__asan_unpoison_stack_memory(i64 %{{[^ ]+}}, i64 40)
+  call void @llvm.lifetime.end(i64 40, i8* %arr.ptr)
+  ; CHECK: call void @__asan_poison_stack_memory(i64 %{{[^ ]+}}, i64 40)
+
+  ; One more lifetime start/end for the same variable %i.
+  call void @llvm.lifetime.start(i64 4, i8* %i.ptr)
+  ; CHECK: call void @__asan_unpoison_stack_memory(i64 %{{[^ ]+}}, i64 4)
+  call void @llvm.lifetime.end(i64 4, i8* %i.ptr)
+  ; CHECK: call void @__asan_poison_stack_memory(i64 %{{[^ ]+}}, i64 4)
+
+  ; Memory is unpoisoned at function exit (only once).
+  ; CHECK: call void @__asan_unpoison_stack_memory(i64 %{{[^ ]+}}, i64 {{.*}})
+  ; CHECK-NOT: @__asan_unpoison_stack_memory
+  ; CHECK: ret void
+  ret void
+}
diff --git a/test/Instrumentation/MemorySanitizer/lit.local.cfg b/test/Instrumentation/MemorySanitizer/lit.local.cfg
new file mode 100644
index 0000000000..19eebc0ac7
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Instrumentation/MemorySanitizer/msan_basic.ll b/test/Instrumentation/MemorySanitizer/msan_basic.ll
new file mode 100644
index 0000000000..3228863193
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/msan_basic.ll
@@ -0,0 +1,395 @@
+; RUN: opt < %s -msan -S | FileCheck %s
+; RUN: opt < %s -msan -msan-track-origins=1 -S | FileCheck -check-prefix=CHECK-ORIGINS %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Check the presence of __msan_init
+; CHECK: @llvm.global_ctors {{.*}} @__msan_init
+
+; Check the presence and the linkage type of __msan_track_origins
+; CHECK: @__msan_track_origins = weak_odr constant i32 0
+
+; Check instrumentation of stores
+define void @Store(i32* nocapture %p, i32 %x) nounwind uwtable {
+entry:
+  store i32 %x, i32* %p, align 4
+  ret void
+}
+
+; CHECK: @Store
+; CHECK: load {{.*}} @__msan_param_tls
+; CHECK: store
+; CHECK: store
+; CHECK: ret void
+; CHECK-ORIGINS: @Store
+; CHECK-ORIGINS: load {{.*}} @__msan_param_tls
+; CHECK-ORIGINS: store
+; CHECK-ORIGINS: icmp
+; CHECK-ORIGINS: br i1
+; CHECK-ORIGINS: <label>
+; CHECK-ORIGINS: store
+; CHECK-ORIGINS: br label
+; CHECK-ORIGINS: <label>
+; CHECK-ORIGINS: store
+; CHECK-ORIGINS: ret void
+
+
+; load followed by cmp: check that we load the shadow and call __msan_warning.
+define void @LoadAndCmp(i32* nocapture %a) nounwind uwtable {
+entry:
+  %0 = load i32* %a, align 4
+  %tobool = icmp eq i32 %0, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void (...)* @foo() nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  ret void
+}
+
+declare void @foo(...)
+
+; CHECK: @LoadAndCmp
+; CHECK: = load
+; CHECK: = load
+; CHECK: call void @__msan_warning_noreturn()
+; CHECK-NEXT: call void asm sideeffect
+; CHECK-NEXT: unreachable
+; CHECK: ret void
+
+; Check that we store the shadow for the retval.
+define i32 @ReturnInt() nounwind uwtable readnone {
+entry:
+  ret i32 123
+}
+
+; CHECK: @ReturnInt
+; CHECK: store i32 0,{{.*}}__msan_retval_tls
+; CHECK: ret i32
+
+; Check that we get the shadow for the retval.
+define void @CopyRetVal(i32* nocapture %a) nounwind uwtable {
+entry:
+  %call = tail call i32 @ReturnInt() nounwind
+  store i32 %call, i32* %a, align 4
+  ret void
+}
+
+; CHECK: @CopyRetVal
+; CHECK: load{{.*}}__msan_retval_tls
+; CHECK: store
+; CHECK: store
+; CHECK: ret void
+
+
+; Check that we generate PHIs for shadow.
+define void @FuncWithPhi(i32* nocapture %a, i32* %b, i32* nocapture %c) nounwind uwtable {
+entry:
+  %tobool = icmp eq i32* %b, null
+  br i1 %tobool, label %if.else, label %if.then
+
+  if.then:                                          ; preds = %entry
+  %0 = load i32* %b, align 4
+  br label %if.end
+
+  if.else:                                          ; preds = %entry
+  %1 = load i32* %c, align 4
+  br label %if.end
+
+  if.end:                                           ; preds = %if.else, %if.then
+  %t.0 = phi i32 [ %0, %if.then ], [ %1, %if.else ]
+  store i32 %t.0, i32* %a, align 4
+  ret void
+}
+
+; CHECK: @FuncWithPhi
+; CHECK: = phi
+; CHECK-NEXT: = phi
+; CHECK: store
+; CHECK: store
+; CHECK: ret void
+
+; Compute shadow for "x << 10"
+define void @ShlConst(i32* nocapture %x) nounwind uwtable {
+entry:
+  %0 = load i32* %x, align 4
+  %1 = shl i32 %0, 10
+  store i32 %1, i32* %x, align 4
+  ret void
+}
+
+; CHECK: @ShlConst
+; CHECK: = load
+; CHECK: = load
+; CHECK: shl
+; CHECK: shl
+; CHECK: store
+; CHECK: store
+; CHECK: ret void
+
+; Compute shadow for "10 << x": it should have 'sext i1'.
+define void @ShlNonConst(i32* nocapture %x) nounwind uwtable {
+entry:
+  %0 = load i32* %x, align 4
+  %1 = shl i32 10, %0
+  store i32 %1, i32* %x, align 4
+  ret void
+}
+
+; CHECK: @ShlNonConst
+; CHECK: = load
+; CHECK: = load
+; CHECK: = sext i1
+; CHECK: store
+; CHECK: store
+; CHECK: ret void
+
+; SExt
+define void @SExt(i32* nocapture %a, i16* nocapture %b) nounwind uwtable {
+entry:
+  %0 = load i16* %b, align 2
+  %1 = sext i16 %0 to i32
+  store i32 %1, i32* %a, align 4
+  ret void
+}
+
+; CHECK: @SExt
+; CHECK: = load
+; CHECK: = load
+; CHECK: = sext
+; CHECK: = sext
+; CHECK: store
+; CHECK: store
+; CHECK: ret void
+
+
+; memset
+define void @MemSet(i8* nocapture %x) nounwind uwtable {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* %x, i8 42, i64 10, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+; CHECK: @MemSet
+; CHECK: call i8* @__msan_memset
+; CHECK: ret void
+
+
+; memcpy
+define void @MemCpy(i8* nocapture %x, i8* nocapture %y) nounwind uwtable {
+entry:
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %x, i8* %y, i64 10, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+; CHECK: @MemCpy
+; CHECK: call i8* @__msan_memcpy
+; CHECK: ret void
+
+
+; memmove is lowered to a call
+define void @MemMove(i8* nocapture %x, i8* nocapture %y) nounwind uwtable {
+entry:
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* %x, i8* %y, i64 10, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+; CHECK: @MemMove
+; CHECK: call i8* @__msan_memmove
+; CHECK: ret void
+
+
+; Check that we propagate shadow for "select"
+
+define i32 @Select(i32 %a, i32 %b, i32 %c) nounwind uwtable readnone {
+entry:
+  %tobool = icmp ne i32 %c, 0
+  %cond = select i1 %tobool, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+; CHECK: @Select
+; CHECK: select
+; CHECK-NEXT: select
+; CHECK: ret i32
+
+
+define i8* @IntToPtr(i64 %x) nounwind uwtable readnone {
+entry:
+  %0 = inttoptr i64 %x to i8*
+  ret i8* %0
+}
+
+; CHECK: @IntToPtr
+; CHECK: load i64*{{.*}}__msan_param_tls
+; CHECK-NEXT: inttoptr
+; CHECK-NEXT: store i64{{.*}}__msan_retval_tls
+; CHECK: ret i8
+
+
+define i8* @IntToPtr_ZExt(i16 %x) nounwind uwtable readnone {
+entry:
+  %0 = inttoptr i16 %x to i8*
+  ret i8* %0
+}
+
+; CHECK: @IntToPtr_ZExt
+; CHECK: zext
+; CHECK-NEXT: inttoptr
+; CHECK: ret i8
+
+
+; Check that we insert exactly one check on udiv
+; (2nd arg shadow is checked, 1st arg shadow is propagated)
+
+define i32 @Div(i32 %a, i32 %b) nounwind uwtable readnone {
+entry:
+  %div = udiv i32 %a, %b
+  ret i32 %div
+}
+
+; CHECK: @Div
+; CHECK: icmp
+; CHECK: call void @__msan_warning
+; CHECK-NOT: icmp
+; CHECK: udiv
+; CHECK-NOT: icmp
+; CHECK: ret i32
+
+
+; Check that we propagate shadow for x<0, x>=0, etc (i.e. sign bit tests)
+
+define zeroext i1 @ICmpSLT(i32 %x) nounwind uwtable readnone {
+  %1 = icmp slt i32 %x, 0
+  ret i1 %1
+}
+
+; CHECK: @ICmpSLT
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+define zeroext i1 @ICmpSGE(i32 %x) nounwind uwtable readnone {
+  %1 = icmp sge i32 %x, 0
+  ret i1 %1
+}
+
+; CHECK: @ICmpSGE
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp sge
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+define zeroext i1 @ICmpSGT(i32 %x) nounwind uwtable readnone {
+  %1 = icmp sgt i32 0, %x
+  ret i1 %1
+}
+
+; CHECK: @ICmpSGT
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp sgt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+define zeroext i1 @ICmpSLE(i32 %x) nounwind uwtable readnone {
+  %1 = icmp sle i32 0, %x
+  ret i1 %1
+}
+
+; CHECK: @ICmpSLE
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp sle
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+
+; Check that loads from shadow have the same aligment as the original loads.
+
+define i32 @ShadowLoadAlignmentLarge() nounwind uwtable {
+  %y = alloca i32, align 64
+  %1 = load volatile i32* %y, align 64
+  ret i32 %1
+}
+
+; CHECK: @ShadowLoadAlignmentLarge
+; CHECK: load i32* {{.*}} align 64
+; CHECK: load volatile i32* {{.*}} align 64
+; CHECK: ret i32
+
+define i32 @ShadowLoadAlignmentSmall() nounwind uwtable {
+  %y = alloca i32, align 2
+  %1 = load volatile i32* %y, align 2
+  ret i32 %1
+}
+
+; CHECK: @ShadowLoadAlignmentSmall
+; CHECK: load i32* {{.*}} align 2
+; CHECK: load volatile i32* {{.*}} align 2
+; CHECK: ret i32
+
+
+; Test vector manipulation instructions.
+; Check that the same bit manipulation is applied to the shadow values.
+; Check that there is a zero test of the shadow of %idx argument, where present.
+
+define i32 @ExtractElement(<4 x i32> %vec, i32 %idx) {
+  %x = extractelement <4 x i32> %vec, i32 %idx
+  ret i32 %x
+}
+
+; CHECK: @ExtractElement
+; CHECK: extractelement
+; CHECK: call void @__msan_warning
+; CHECK: extractelement
+; CHECK: ret i32
+
+define <4 x i32> @InsertElement(<4 x i32> %vec, i32 %idx, i32 %x) {
+  %vec1 = insertelement <4 x i32> %vec, i32 %x, i32 %idx
+  ret <4 x i32> %vec1
+}
+
+; CHECK: @InsertElement
+; CHECK: insertelement
+; CHECK: call void @__msan_warning
+; CHECK: insertelement
+; CHECK: ret <4 x i32>
+
+define <4 x i32> @ShuffleVector(<4 x i32> %vec, <4 x i32> %vec1) {
+  %vec2 = shufflevector <4 x i32> %vec, <4 x i32> %vec1,
+                        <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  ret <4 x i32> %vec2
+}
+
+; CHECK: @ShuffleVector
+; CHECK: shufflevector
+; CHECK-NOT: call void @__msan_warning
+; CHECK: shufflevector
+; CHECK: ret <4 x i32>
+
+; Test bswap intrinsic instrumentation
+define i32 @BSwap(i32 %x) nounwind uwtable readnone {
+  %y = tail call i32 @llvm.bswap.i32(i32 %x)
+  ret i32 %y
+}
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+
+; CHECK: @BSwap
+; CHECK-NOT: call void @__msan_warning
+; CHECK: @llvm.bswap.i32
+; CHECK-NOT: call void @__msan_warning
+; CHECK: @llvm.bswap.i32
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i32
diff --git a/test/Instrumentation/ThreadSanitizer/atomic.ll b/test/Instrumentation/ThreadSanitizer/atomic.ll
index d9fc222f12..70b6cbbf31 100644
--- a/test/Instrumentation/ThreadSanitizer/atomic.ll
+++ b/test/Instrumentation/ThreadSanitizer/atomic.ll
@@ -114,6 +114,14 @@ entry:
 ; CHECK: atomic8_xor_monotonic
 ; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 0)
 
+define void @atomic8_nand_monotonic(i8* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i8* %a, i8 0 monotonic
+  ret void
+}
+; CHECK: atomic8_nand_monotonic
+; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 0)
+
 define void @atomic8_xchg_acquire(i8* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i8* %a, i8 0 acquire
@@ -162,6 +170,14 @@ entry:
 ; CHECK: atomic8_xor_acquire
 ; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 2)
 
+define void @atomic8_nand_acquire(i8* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i8* %a, i8 0 acquire
+  ret void
+}
+; CHECK: atomic8_nand_acquire
+; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 2)
+
 define void @atomic8_xchg_release(i8* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i8* %a, i8 0 release
@@ -210,6 +226,14 @@ entry:
 ; CHECK: atomic8_xor_release
 ; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 3)
 
+define void @atomic8_nand_release(i8* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i8* %a, i8 0 release
+  ret void
+}
+; CHECK: atomic8_nand_release
+; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 3)
+
 define void @atomic8_xchg_acq_rel(i8* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i8* %a, i8 0 acq_rel
@@ -258,6 +282,14 @@ entry:
 ; CHECK: atomic8_xor_acq_rel
 ; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 4)
 
+define void @atomic8_nand_acq_rel(i8* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i8* %a, i8 0 acq_rel
+  ret void
+}
+; CHECK: atomic8_nand_acq_rel
+; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 4)
+
 define void @atomic8_xchg_seq_cst(i8* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i8* %a, i8 0 seq_cst
@@ -306,6 +338,14 @@ entry:
 ; CHECK: atomic8_xor_seq_cst
 ; CHECK: call i8 @__tsan_atomic8_fetch_xor(i8* %a, i8 0, i32 5)
 
+define void @atomic8_nand_seq_cst(i8* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i8* %a, i8 0 seq_cst
+  ret void
+}
+; CHECK: atomic8_nand_seq_cst
+; CHECK: call i8 @__tsan_atomic8_fetch_nand(i8* %a, i8 0, i32 5)
+
 define void @atomic8_cas_monotonic(i8* %a) nounwind uwtable {
 entry:
   cmpxchg i8* %a, i8 0, i8 1 monotonic
@@ -458,6 +498,14 @@ entry:
 ; CHECK: atomic16_xor_monotonic
 ; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 0)
 
+define void @atomic16_nand_monotonic(i16* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i16* %a, i16 0 monotonic
+  ret void
+}
+; CHECK: atomic16_nand_monotonic
+; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 0)
+
 define void @atomic16_xchg_acquire(i16* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i16* %a, i16 0 acquire
@@ -506,6 +554,14 @@ entry:
 ; CHECK: atomic16_xor_acquire
 ; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 2)
 
+define void @atomic16_nand_acquire(i16* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i16* %a, i16 0 acquire
+  ret void
+}
+; CHECK: atomic16_nand_acquire
+; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 2)
+
 define void @atomic16_xchg_release(i16* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i16* %a, i16 0 release
@@ -554,6 +610,14 @@ entry:
 ; CHECK: atomic16_xor_release
 ; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 3)
 
+define void @atomic16_nand_release(i16* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i16* %a, i16 0 release
+  ret void
+}
+; CHECK: atomic16_nand_release
+; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 3)
+
 define void @atomic16_xchg_acq_rel(i16* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i16* %a, i16 0 acq_rel
@@ -602,6 +666,14 @@ entry:
 ; CHECK: atomic16_xor_acq_rel
 ; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 4)
 
+define void @atomic16_nand_acq_rel(i16* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i16* %a, i16 0 acq_rel
+  ret void
+}
+; CHECK: atomic16_nand_acq_rel
+; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 4)
+
 define void @atomic16_xchg_seq_cst(i16* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i16* %a, i16 0 seq_cst
@@ -650,6 +722,14 @@ entry:
 ; CHECK: atomic16_xor_seq_cst
 ; CHECK: call i16 @__tsan_atomic16_fetch_xor(i16* %a, i16 0, i32 5)
 
+define void @atomic16_nand_seq_cst(i16* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i16* %a, i16 0 seq_cst
+  ret void
+}
+; CHECK: atomic16_nand_seq_cst
+; CHECK: call i16 @__tsan_atomic16_fetch_nand(i16* %a, i16 0, i32 5)
+
 define void @atomic16_cas_monotonic(i16* %a) nounwind uwtable {
 entry:
   cmpxchg i16* %a, i16 0, i16 1 monotonic
@@ -802,6 +882,14 @@ entry:
 ; CHECK: atomic32_xor_monotonic
 ; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 0)
 
+define void @atomic32_nand_monotonic(i32* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i32* %a, i32 0 monotonic
+  ret void
+}
+; CHECK: atomic32_nand_monotonic
+; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 0)
+
 define void @atomic32_xchg_acquire(i32* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i32* %a, i32 0 acquire
@@ -850,6 +938,14 @@ entry:
 ; CHECK: atomic32_xor_acquire
 ; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 2)
 
+define void @atomic32_nand_acquire(i32* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i32* %a, i32 0 acquire
+  ret void
+}
+; CHECK: atomic32_nand_acquire
+; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 2)
+
 define void @atomic32_xchg_release(i32* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i32* %a, i32 0 release
@@ -898,6 +994,14 @@ entry:
 ; CHECK: atomic32_xor_release
 ; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 3)
 
+define void @atomic32_nand_release(i32* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i32* %a, i32 0 release
+  ret void
+}
+; CHECK: atomic32_nand_release
+; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 3)
+
 define void @atomic32_xchg_acq_rel(i32* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i32* %a, i32 0 acq_rel
@@ -946,6 +1050,14 @@ entry:
 ; CHECK: atomic32_xor_acq_rel
 ; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 4)
 
+define void @atomic32_nand_acq_rel(i32* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i32* %a, i32 0 acq_rel
+  ret void
+}
+; CHECK: atomic32_nand_acq_rel
+; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 4)
+
 define void @atomic32_xchg_seq_cst(i32* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i32* %a, i32 0 seq_cst
@@ -994,6 +1106,14 @@ entry:
 ; CHECK: atomic32_xor_seq_cst
 ; CHECK: call i32 @__tsan_atomic32_fetch_xor(i32* %a, i32 0, i32 5)
 
+define void @atomic32_nand_seq_cst(i32* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i32* %a, i32 0 seq_cst
+  ret void
+}
+; CHECK: atomic32_nand_seq_cst
+; CHECK: call i32 @__tsan_atomic32_fetch_nand(i32* %a, i32 0, i32 5)
+
 define void @atomic32_cas_monotonic(i32* %a) nounwind uwtable {
 entry:
   cmpxchg i32* %a, i32 0, i32 1 monotonic
@@ -1146,6 +1266,14 @@ entry:
 ; CHECK: atomic64_xor_monotonic
 ; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 0)
 
+define void @atomic64_nand_monotonic(i64* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i64* %a, i64 0 monotonic
+  ret void
+}
+; CHECK: atomic64_nand_monotonic
+; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 0)
+
 define void @atomic64_xchg_acquire(i64* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i64* %a, i64 0 acquire
@@ -1194,6 +1322,14 @@ entry:
 ; CHECK: atomic64_xor_acquire
 ; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 2)
 
+define void @atomic64_nand_acquire(i64* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i64* %a, i64 0 acquire
+  ret void
+}
+; CHECK: atomic64_nand_acquire
+; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 2)
+
 define void @atomic64_xchg_release(i64* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i64* %a, i64 0 release
@@ -1242,6 +1378,14 @@ entry:
 ; CHECK: atomic64_xor_release
 ; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 3)
 
+define void @atomic64_nand_release(i64* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i64* %a, i64 0 release
+  ret void
+}
+; CHECK: atomic64_nand_release
+; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 3)
+
 define void @atomic64_xchg_acq_rel(i64* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i64* %a, i64 0 acq_rel
@@ -1290,6 +1434,14 @@ entry:
 ; CHECK: atomic64_xor_acq_rel
 ; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 4)
 
+define void @atomic64_nand_acq_rel(i64* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i64* %a, i64 0 acq_rel
+  ret void
+}
+; CHECK: atomic64_nand_acq_rel
+; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 4)
+
 define void @atomic64_xchg_seq_cst(i64* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i64* %a, i64 0 seq_cst
@@ -1338,6 +1490,14 @@ entry:
 ; CHECK: atomic64_xor_seq_cst
 ; CHECK: call i64 @__tsan_atomic64_fetch_xor(i64* %a, i64 0, i32 5)
 
+define void @atomic64_nand_seq_cst(i64* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i64* %a, i64 0 seq_cst
+  ret void
+}
+; CHECK: atomic64_nand_seq_cst
+; CHECK: call i64 @__tsan_atomic64_fetch_nand(i64* %a, i64 0, i32 5)
+
 define void @atomic64_cas_monotonic(i64* %a) nounwind uwtable {
 entry:
   cmpxchg i64* %a, i64 0, i64 1 monotonic
@@ -1490,6 +1650,14 @@ entry:
 ; CHECK: atomic128_xor_monotonic
 ; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 0)
 
+define void @atomic128_nand_monotonic(i128* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i128* %a, i128 0 monotonic
+  ret void
+}
+; CHECK: atomic128_nand_monotonic
+; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 0)
+
 define void @atomic128_xchg_acquire(i128* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i128* %a, i128 0 acquire
@@ -1538,6 +1706,14 @@ entry:
 ; CHECK: atomic128_xor_acquire
 ; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 2)
 
+define void @atomic128_nand_acquire(i128* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i128* %a, i128 0 acquire
+  ret void
+}
+; CHECK: atomic128_nand_acquire
+; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 2)
+
 define void @atomic128_xchg_release(i128* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i128* %a, i128 0 release
@@ -1586,6 +1762,14 @@ entry:
 ; CHECK: atomic128_xor_release
 ; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 3)
 
+define void @atomic128_nand_release(i128* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i128* %a, i128 0 release
+  ret void
+}
+; CHECK: atomic128_nand_release
+; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 3)
+
 define void @atomic128_xchg_acq_rel(i128* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i128* %a, i128 0 acq_rel
@@ -1634,6 +1818,14 @@ entry:
 ; CHECK: atomic128_xor_acq_rel
 ; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 4)
 
+define void @atomic128_nand_acq_rel(i128* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i128* %a, i128 0 acq_rel
+  ret void
+}
+; CHECK: atomic128_nand_acq_rel
+; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 4)
+
 define void @atomic128_xchg_seq_cst(i128* %a) nounwind uwtable {
 entry:
   atomicrmw xchg i128* %a, i128 0 seq_cst
@@ -1682,6 +1874,14 @@ entry:
 ; CHECK: atomic128_xor_seq_cst
 ; CHECK: call i128 @__tsan_atomic128_fetch_xor(i128* %a, i128 0, i32 5)
 
+define void @atomic128_nand_seq_cst(i128* %a) nounwind uwtable {
+entry:
+  atomicrmw nand i128* %a, i128 0 seq_cst
+  ret void
+}
+; CHECK: atomic128_nand_seq_cst
+; CHECK: call i128 @__tsan_atomic128_fetch_nand(i128* %a, i128 0, i32 5)
+
 define void @atomic128_cas_monotonic(i128* %a) nounwind uwtable {
 entry:
   cmpxchg i128* %a, i128 0, i128 1 monotonic
diff --git a/test/Integer/properties_bt.ll b/test/Integer/properties_bt.ll
index f24ddc2e80..695adf3c5f 100644
--- a/test/Integer/properties_bt.ll
+++ b/test/Integer/properties_bt.ll
@@ -5,5 +5,3 @@
 
 target datalayout = "e-p:32:32"
 target triple = "proc-vend-sys"
-deplibs = [ "m", "c" ]
-
diff --git a/test/JitListener/test-common-symbols.ll b/test/JitListener/test-common-symbols.ll
index 5f460ff686..bc94bda9a4 100644
--- a/test/JitListener/test-common-symbols.ll
+++ b/test/JitListener/test-common-symbols.ll
@@ -97,7 +97,7 @@ declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
 !17 = metadata !{i32 720948, i32 0, null, metadata !"zero_arr", metadata !"zero_arr", metadata !"", metadata !6, i32 3, metadata !18, i32 0, i32 1, [10 x i32]* @zero_arr} ; [ DW_TAG_variable ]
 !18 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 320, i64 32, i32 0, i32 0, metadata !9, metadata !19, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !19 = metadata !{metadata !20}
-!20 = metadata !{i32 720929, i64 0, i64 9}        ; [ DW_TAG_subrange_type ]
+!20 = metadata !{i32 720929, i64 0, i64 10}        ; [ DW_TAG_subrange_type ]
 !21 = metadata !{i32 7, i32 5, metadata !22, null}
 !22 = metadata !{i32 720907, metadata !5, i32 6, i32 1, metadata !6, i32 0} ; [ DW_TAG_lexical_block ]
 !23 = metadata !{i32 9, i32 5, metadata !22, null}
diff --git a/test/JitListener/test-inline.ll b/test/JitListener/test-inline.ll
index 5a4bf1f374..8bfaeafa10 100644
--- a/test/JitListener/test-inline.ll
+++ b/test/JitListener/test-inline.ll
@@ -154,7 +154,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 !24 = metadata !{i32 720909, metadata !20, metadata !"c2", metadata !6, i32 24, i64 16, i64 8, i64 8, i32 0, metadata !25} ; [ DW_TAG_member ]
 !25 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !23, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !26 = metadata !{metadata !27}
-!27 = metadata !{i32 720929, i64 0, i64 1}        ; [ DW_TAG_subrange_type ]
+!27 = metadata !{i32 720929, i64 0, i64 2}        ; [ DW_TAG_subrange_type ]
 !28 = metadata !{i32 720942, i32 0, metadata !20, metadata !"char_struct", metadata !"char_struct", metadata !"", metadata !6, i32 22, metadata !29, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !10} ; [ DW_TAG_subprogram ]
 !29 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !30 = metadata !{null, metadata !31}
diff --git a/test/JitListener/test-parameters.ll b/test/JitListener/test-parameters.ll
index b90a3ea356..0c437a8181 100644
--- a/test/JitListener/test-parameters.ll
+++ b/test/JitListener/test-parameters.ll
@@ -144,7 +144,7 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 !24 = metadata !{i32 720909, metadata !20, metadata !"c2", metadata !6, i32 24, i64 16, i64 8, i64 8, i32 0, metadata !25} ; [ DW_TAG_member ]
 !25 = metadata !{i32 720897, null, metadata !"", null, i32 0, i64 16, i64 8, i32 0, i32 0, metadata !23, metadata !26, i32 0, i32 0} ; [ DW_TAG_array_type ]
 !26 = metadata !{metadata !27}
-!27 = metadata !{i32 720929, i64 0, i64 1}        ; [ DW_TAG_subrange_type ]
+!27 = metadata !{i32 720929, i64 0, i64 2}        ; [ DW_TAG_subrange_type ]
 !28 = metadata !{i32 720942, i32 0, metadata !20, metadata !"char_struct", metadata !"char_struct", metadata !"", metadata !6, i32 22, metadata !29, i1 false, i1 false, i32 0, i32 0, null, i32 320, i1 false, null, null, i32 0, metadata !10} ; [ DW_TAG_subprogram ]
 !29 = metadata !{i32 720917, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
 !30 = metadata !{null, metadata !31}
diff --git a/test/Linker/2006-01-19-ConstantPacked.ll b/test/Linker/2006-01-19-ConstantPacked.ll
index d2409e20c4..5b39cb2ad2 100644
--- a/test/Linker/2006-01-19-ConstantPacked.ll
+++ b/test/Linker/2006-01-19-ConstantPacked.ll
@@ -3,11 +3,8 @@
 
 target datalayout = "E-p:32:32"
 target triple = "powerpc-apple-darwin7.7.0"
-deplibs = [ "c", "crtend" ]
 @source = global <4 x i32> < i32 0, i32 1, i32 2, i32 3 >		; <<4 x i32>*> [#uses=0]
 
 define i32 @main() {
-entry:
-	ret i32 0
+  ret i32 0
 }
-
diff --git a/test/MC/Disassembler/ARM/hex-immediates.txt b/test/MC/Disassembler/ARM/hex-immediates.txt
new file mode 100644
index 0000000000..2634d7ed33
--- /dev/null
+++ b/test/MC/Disassembler/ARM/hex-immediates.txt
@@ -0,0 +1,5 @@
+# RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -hdis < %s | FileCheck %s
+# CHECK: ldr	r4, [pc, #0x20]
+0x08 0x4c
+# CHECK: sub	sp, #0x84
+0xa1 0xb0
diff --git a/test/MC/Disassembler/ARM/unpredictable-BFI.txt b/test/MC/Disassembler/ARM/unpredictable-BFI.txt
new file mode 100644
index 0000000000..a98f859c4c
--- /dev/null
+++ b/test/MC/Disassembler/ARM/unpredictable-BFI.txt
@@ -0,0 +1,11 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 2>&1 | FileCheck %s
+
+# rdar://11437956
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: 0x90 0x00 0xc0 0xe7
+0x90 0x00 0xc0 0xe7
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: 0x90 0x01 0xc0 0xe7
+0x90 0x01 0xc0 0xe7
diff --git a/test/MC/Disassembler/X86/hex-immediates.txt b/test/MC/Disassembler/X86/hex-immediates.txt
new file mode 100644
index 0000000000..80d24487ee
--- /dev/null
+++ b/test/MC/Disassembler/X86/hex-immediates.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc --hdis %s -triple=x86_64-apple-darwin9 2>&1 | FileCheck %s
+
+# CHECK: movabsq	$0x7fffffffffffffff, %rcx
+0x48 0xb9 0xff 0xff 0xff 0xff 0xff 0xff 0xff 0x7f
+# CHECK: leaq	0x3e2(%rip), %rdi
+0x48 0x8d 0x3d 0xe2 0x03 0x00 0x00
+# CHECK: subq	$0x40, %rsp
+0x48 0x83 0xec 0x40
+# CHECK: leal	(,%r14,4), %eax
+0x42 0x8d 0x04 0xb5 0x00 0x00 0x00 0x00
diff --git a/test/MC/ELF/no-fixup.s b/test/MC/ELF/no-fixup.s
index 6e719bcc8c..8cc299fb24 100644
--- a/test/MC/ELF/no-fixup.s
+++ b/test/MC/ELF/no-fixup.s
@@ -4,11 +4,8 @@
 // Test that we create no fixups for this file since "a" and "b" are in the
 // same fragment.
 
-// CHECK:      assembler - Number of assembler layout and relaxation steps
-// CHECK-NEXT: assembler - Number of emitted assembler fragments
-// CHECK-NEXT: assembler - Number of emitted object file bytes
+// CHECK:      assembler - Number of emitted object file bytes
 // CHECK-NEXT: assembler - Number of fragment layouts
-// CHECK-NEXT: mcexpr    - Number of MCExpr evaluations
 
 a:
   nop
diff --git a/test/MC/ELF/relax-all-flag.s b/test/MC/ELF/relax-all-flag.s
new file mode 100644
index 0000000000..4c1c78c392
--- /dev/null
+++ b/test/MC/ELF/relax-all-flag.s
@@ -0,0 +1,19 @@
+// By default, the jmp here does not need relaxation (so the 0xeb opdoce can be
+// used).
+// However, with -mc-relax-all passed to MC, all jumps are relaxed and we
+// expect to see a different instruction.
+
+// RUN: llvm-mc -filetype=obj -mc-relax-all -triple x86_64-pc-linux-gnu %s -o - \
+// RUN:  | llvm-objdump -disassemble - | FileCheck -check-prefix=RELAXALL %s
+
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - \
+// RUN:  | llvm-objdump -disassemble - | FileCheck %s
+
+.text
+foo:
+  mov %rax, %rax
+  jmp foo
+
+// RELAXALL:    3:  e9
+// CHECK:       3:  eb
+
diff --git a/test/MC/MachO/ARM/lit.local.cfg b/test/MC/MachO/ARM/lit.local.cfg
index 89764637fe..9f0d39d921 100644
--- a/test/MC/MachO/ARM/lit.local.cfg
+++ b/test/MC/MachO/ARM/lit.local.cfg
@@ -1,4 +1,4 @@
-config.suffixes = ['.s']
+config.suffixes = ['.s', '.ll']
 
 targets = set(config.root.targets_to_build.split())
 if not 'ARM' in targets:
diff --git a/test/MC/MachO/gen-dwarf.s b/test/MC/MachO/gen-dwarf.s
index 4fbc32d295..cf2d1db133 100644
--- a/test/MC/MachO/gen-dwarf.s
+++ b/test/MC/MachO/gen-dwarf.s
@@ -86,7 +86,7 @@ _x:	.long 1
 // CHECK: .debug_aranges contents:
 // CHECK: Address Range Header: length = 0x0000001c, version = 0x0002, cu_offset = 0x00000000, addr_size = 0x04, seg_size = 0x00
 
-// CHECK: .debug_lines contents:
+// CHECK: .debug_line contents:
 // CHECK: Line table prologue:
 // We don't check the total_length as it includes lengths of temp paths
 // CHECK:         version: 2
diff --git a/test/MC/PowerPC/ppc64-initial-cfa.ll b/test/MC/PowerPC/ppc64-initial-cfa.ll
new file mode 100644
index 0000000000..3936cf2e81
--- /dev/null
+++ b/test/MC/PowerPC/ppc64-initial-cfa.ll
@@ -0,0 +1,41 @@
+;; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -filetype=obj %s -o - | \
+;; RUN: elf-dump --dump-section-data | FileCheck %s
+
+;; FIXME: this file should be in .s form, change when asm parser is available.
+
+define void @f() {
+entry:
+  ret void
+}
+
+;; CHECK:      ('sh_name', 0x{{.*}}) # '.eh_frame'
+;; CHECK-NEXT: ('sh_type', 0x00000001)
+;; CHECK-NEXT: ('sh_flags', 0x0000000000000002)
+;; CHECK-NEXT: ('sh_addr', 0x{{.*}})
+;; CHECK-NEXT: ('sh_offset', 0x{{.*}})
+;; CHECK-NEXT: ('sh_size', 0x0000000000000030)
+;; CHECK-NEXT: ('sh_link', 0x00000000)
+;; CHECK-NEXT: ('sh_info', 0x00000000)
+;; CHECK-NEXT: ('sh_addralign', 0x0000000000000008)
+;; CHECK-NEXT: ('sh_entsize', 0x0000000000000000)
+;; CHECK-NEXT: ('_section_data', '00000010 00000000 017a5200 01784101 000c0100 00000018 00000018 00000000 00000000 00000000 00000010 00000000')
+
+;; CHECK:      ('sh_name', 0x{{.*}}) # '.rela.eh_frame'
+;; CHECK-NEXT: ('sh_type', 0x00000004)
+;; CHECK-NEXT: ('sh_flags', 0x0000000000000000)
+;; CHECK-NEXT: ('sh_addr', 0x{{.*}})
+;; CHECK-NEXT: ('sh_offset', 0x{{.*}})
+;; CHECK-NEXT: ('sh_size', 0x0000000000000018)
+;; CHECK-NEXT: ('sh_link', 0x{{.*}})
+;; CHECK-NEXT: ('sh_info', 0x{{.*}})
+;; CHECK-NEXT: ('sh_addralign', 0x0000000000000008)
+;; CHECK-NEXT: ('sh_entsize', 0x0000000000000018)
+;; CHECK-NEXT: ('_relocations', [
+;; CHECK-NEXT:  # Relocation 0
+;; CHECK-NEXT:  (('r_offset', 0x000000000000001c)
+;; CHECK-NEXT:   ('r_sym', 0x{{.*}})
+;; CHECK-NEXT:   ('r_type', 0x00000026)
+;; CHECK-NEXT:   ('r_addend', 0x0000000000000000)
+;; CHECK-NEXT:  ),
+;; CHECK-NEXT: ])
+
diff --git a/test/Makefile b/test/Makefile
index 810fdded46..4e690cc325 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -78,8 +78,10 @@ else # !SunOS
 ifeq ($(HOST_OS),AuroraUX)
 ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ;
 else # !AuroraUX
-# Fedora 13 x86-64 python fails with -v 76800
-ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 1024000 ;
+# Newer versions of python try to allocate an insane amount of address space for
+# its thread-local storage, don't set a limit here.
+# FIXME: Those limits should be enforced by lit instead of globally.
+ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ;
 endif # AuroraUX
 endif # SunOS
 
diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll
index d9a12eebed..3527ae75b4 100644
--- a/test/Transforms/BBVectorize/simple.ll
+++ b/test/Transforms/BBVectorize/simple.ll
@@ -173,3 +173,27 @@ define double @test7(double %A1, double %A2, double %B1, double %B2) {
 ; CHECK: ret double %R
 }
 
+; Basic depth-3 chain (subclass data)
+define i64 @test8(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
+; CHECK: @test8
+; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
+	%X1 = sub nsw i64 %A1, %B1
+	%X2 = sub i64 %A2, %B2
+; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
+	%Y1 = mul i64 %X1, %A1
+	%Y2 = mul i64 %X2, %A2
+; CHECK: %Y1 = mul <2 x i64> %X1, %X1.v.i0.2
+	%Z1 = add i64 %Y1, %B1
+	%Z2 = add i64 %Y2, %B2
+; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
+	%R  = mul i64 %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
+; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
+	ret i64 %R
+; CHECK: ret i64 %R
+}
+
diff --git a/test/Transforms/GVN/crash-no-aa.ll b/test/Transforms/GVN/crash-no-aa.ll
index dae65ddb2f..c87a9c6576 100644
--- a/test/Transforms/GVN/crash-no-aa.ll
+++ b/test/Transforms/GVN/crash-no-aa.ll
@@ -1,7 +1,6 @@
 ; RUN: opt -no-aa -gvn -S %s
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v1
-28:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-freebsd8.0"
 
 ; PR5744
diff --git a/test/Transforms/IndVarSimplify/eliminate-comparison.ll b/test/Transforms/IndVarSimplify/eliminate-comparison.ll
index 953bbdff5c..5dca712646 100644
--- a/test/Transforms/IndVarSimplify/eliminate-comparison.ll
+++ b/test/Transforms/IndVarSimplify/eliminate-comparison.ll
@@ -106,3 +106,106 @@ loop:
 return:
   ret void
 }
+
+; PR14432
+; Indvars should not turn the second loop into an infinite one.
+
+; CHECK: @func_11
+; CHECK: %tmp5 = icmp slt i32 %__key6.0, 10
+; CHECK-NOT: br i1 true, label %noassert68, label %unrolledend
+
+define i32 @func_11() nounwind uwtable {
+entry:
+  br label %forcond
+
+forcond:                                          ; preds = %noassert, %entry
+  %__key6.0 = phi i32 [ 2, %entry ], [ %tmp37, %noassert ]
+  %tmp5 = icmp slt i32 %__key6.0, 10
+  br i1 %tmp5, label %noassert, label %forcond38.preheader
+
+forcond38.preheader:                              ; preds = %forcond
+  br label %forcond38
+
+noassert:                                         ; preds = %forbody
+  %tmp13 = sdiv i32 -32768, %__key6.0
+  %tmp2936 = shl i32 %tmp13, 24
+  %sext23 = shl i32 %tmp13, 24
+  %tmp32 = icmp eq i32 %tmp2936, %sext23
+  %tmp37 = add i32 %__key6.0, 1
+  br i1 %tmp32, label %forcond, label %assert33
+
+assert33:                                         ; preds = %noassert
+  tail call void @llvm.trap()
+  unreachable
+
+forcond38:                                        ; preds = %noassert68, %forcond38.preheader
+  %__key8.0 = phi i32 [ %tmp81, %noassert68 ], [ 2, %forcond38.preheader ]
+  %tmp46 = icmp slt i32 %__key8.0, 10
+  br i1 %tmp46, label %noassert68, label %unrolledend
+
+noassert68:                                       ; preds = %forbody39
+  %tmp57 = sdiv i32 -32768, %__key8.0
+  %sext34 = shl i32 %tmp57, 16
+  %sext21 = shl i32 %tmp57, 16
+  %tmp76 = icmp eq i32 %sext34, %sext21
+  %tmp81 = add i32 %__key8.0, 1
+  br i1 %tmp76, label %forcond38, label %assert77
+
+assert77:                                         ; preds = %noassert68
+  tail call void @llvm.trap()
+  unreachable
+
+unrolledend:                                      ; preds = %forcond38
+  ret i32 0
+}
+
+declare void @llvm.trap() noreturn nounwind
+
+; In this case the second loop only has a single iteration, fold the header away
+; CHECK: @func_12
+; CHECK: %tmp5 = icmp slt i32 %__key6.0, 10
+; CHECK: br i1 true, label %noassert68, label %unrolledend
+define i32 @func_12() nounwind uwtable {
+entry:
+  br label %forcond
+
+forcond:                                          ; preds = %noassert, %entry
+  %__key6.0 = phi i32 [ 2, %entry ], [ %tmp37, %noassert ]
+  %tmp5 = icmp slt i32 %__key6.0, 10
+  br i1 %tmp5, label %noassert, label %forcond38.preheader
+
+forcond38.preheader:                              ; preds = %forcond
+  br label %forcond38
+
+noassert:                                         ; preds = %forbody
+  %tmp13 = sdiv i32 -32768, %__key6.0
+  %tmp2936 = shl i32 %tmp13, 24
+  %sext23 = shl i32 %tmp13, 24
+  %tmp32 = icmp eq i32 %tmp2936, %sext23
+  %tmp37 = add i32 %__key6.0, 1
+  br i1 %tmp32, label %forcond, label %assert33
+
+assert33:                                         ; preds = %noassert
+  tail call void @llvm.trap()
+  unreachable
+
+forcond38:                                        ; preds = %noassert68, %forcond38.preheader
+  %__key8.0 = phi i32 [ %tmp81, %noassert68 ], [ 2, %forcond38.preheader ]
+  %tmp46 = icmp slt i32 %__key8.0, 10
+  br i1 %tmp46, label %noassert68, label %unrolledend
+
+noassert68:                                       ; preds = %forbody39
+  %tmp57 = sdiv i32 -32768, %__key8.0
+  %sext34 = shl i32 %tmp57, 16
+  %sext21 = shl i32 %tmp57, 16
+  %tmp76 = icmp ne i32 %sext34, %sext21
+  %tmp81 = add i32 %__key8.0, 1
+  br i1 %tmp76, label %forcond38, label %assert77
+
+assert77:                                         ; preds = %noassert68
+  tail call void @llvm.trap()
+  unreachable
+
+unrolledend:                                      ; preds = %forcond38
+  ret i32 0
+}
diff --git a/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll b/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll
index b4380d01e4..ee5a378b18 100644
--- a/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll
+++ b/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll
@@ -2,7 +2,6 @@
 ; PR993
 target datalayout = "e-p:32:32"
 target triple = "i386-unknown-openbsd3.9"
-deplibs = [ "stdc++", "c", "crtend" ]
 	%"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" = type { i8* }
 	%"struct.__gnu_cxx::char_producer<char>" = type { i32 (...)** }
 	%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
diff --git a/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll b/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll
index b754d9f9f5..fb5a4b512b 100644
--- a/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll
+++ b/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll
@@ -2,7 +2,6 @@
 ; PR992
 target datalayout = "e-p:32:32"
 target triple = "i686-pc-linux-gnu"
-deplibs = [ "stdc++", "c", "crtend" ]
 	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
 	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
 	%"struct.__cxxabiv1::__array_type_info" = type { %"struct.std::type_info" }
diff --git a/test/Transforms/SimplifyLibCalls/2010-05-30-memcpy-Struct.ll b/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll
index f67bae74f5..b75fa5ad40 100644
--- a/test/Transforms/SimplifyLibCalls/2010-05-30-memcpy-Struct.ll
+++ b/test/Transforms/InstCombine/2010-05-30-memcpy-Struct.ll
@@ -1,4 +1,4 @@
-; RUN: opt -simplify-libcalls %s -S -o - | FileCheck %s
+; RUN: opt -instcombine %s -S -o - | FileCheck %s
 ; PR7265
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
@@ -6,11 +6,11 @@ target triple = "x86_64-unknown-linux-gnu"
 
 %union.anon = type { i32, [4 x i8] }
 
-@.str = private constant [3 x i8] c"%s\00"        ; <[3 x i8]*> [#uses=2]
+@.str = private constant [3 x i8] c"%s\00"
 
 define void @CopyEventArg(%union.anon* %ev) nounwind {
 entry:
-  %call = call i32 (i8*, i8*, ...)* @sprintf(i8* undef, i8* getelementptr inbounds ([3 x i8]* @.str, i64 0, i64 0), %union.anon* %ev) nounwind ; <i32> [#uses=0]
+  %call = call i32 (i8*, i8*, ...)* @sprintf(i8* undef, i8* getelementptr inbounds ([3 x i8]* @.str, i64 0, i64 0), %union.anon* %ev) nounwind
 ; CHECK: bitcast %union.anon* %ev to i8*
 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
   ret void
diff --git a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
index eb28994756..8001621979 100644
--- a/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
+++ b/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
@@ -5,8 +5,8 @@
 define i32 @main(i32 %argc) nounwind ssp {
 entry:
   %tmp3151 = trunc i32 %argc to i8
-; CHECK: %tmp3163 = shl i8 %tmp3162, 6
-; CHECK: and i8 %tmp3163, 64
+; CHECK: %0 = shl i8 %tmp3151, 5
+; CHECK: and i8 %0, 64
 ; CHECK-NOT: shl
 ; CHECK-NOT: shr
   %tmp3161 = or i8 %tmp3151, -17
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
new file mode 100644
index 0000000000..b6a15677bb
--- /dev/null
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; testing-case "float fold(float a) { return 1.2f * a * 2.3f; }"
+; 1.2f and 2.3f is supposed to be fold.
+define float @fold(float %a) {
+fold:
+  %mul = fmul fast float %a, 0x3FF3333340000000
+  %mul1 = fmul fast float %mul, 0x4002666660000000
+  ret float %mul1
+; CHECK: fold
+; CHECK: fmul float %a, 0x4006147AE0000000
+}
+
+; Same testing-case as the one used in fold() except that the operators have
+; fixed FP mode.
+define float @notfold(float %a) {
+notfold:
+; CHECK: notfold
+; CHECK: %mul = fmul fast float %a, 0x3FF3333340000000
+  %mul = fmul fast float %a, 0x3FF3333340000000
+  %mul1 = fmul float %mul, 0x4002666660000000
+  ret float %mul1
+}
+
+define float @fold2(float %a) {
+fold2:
+; CHECK: fold2
+; CHECK: fmul float %a, 0x4006147AE0000000
+  %mul = fmul float %a, 0x3FF3333340000000
+  %mul1 = fmul fast float %mul, 0x4002666660000000
+  ret float %mul1
+}
diff --git a/test/Transforms/InstCombine/fprintf-1.ll b/test/Transforms/InstCombine/fprintf-1.ll
new file mode 100644
index 0000000000..39d86b4588
--- /dev/null
+++ b/test/Transforms/InstCombine/fprintf-1.ll
@@ -0,0 +1,80 @@
+; Test that the fprintf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+%FILE = type { }
+
+@hello_world = constant [13 x i8] c"hello world\0A\00"
+@percent_c = constant [3 x i8] c"%c\00"
+@percent_d = constant [3 x i8] c"%d\00"
+@percent_f = constant [3 x i8] c"%f\00"
+@percent_s = constant [3 x i8] c"%s\00"
+
+declare i32 @fprintf(%FILE*, i8*, ...)
+
+; Check fprintf(fp, "foo") -> fwrite("foo", 3, 1, fp).
+
+define void @test_simplify1(%FILE* %fp) {
+; CHECK: @test_simplify1
+  %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt)
+; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0), i32 12, i32 1, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check fprintf(fp, "%c", chr) -> fputc(chr, fp).
+
+define void @test_simplify2(%FILE* %fp) {
+; CHECK: @test_simplify2
+  %fmt = getelementptr [3 x i8]* @percent_c, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, i8 104)
+; CHECK-NEXT: call i32 @fputc(i32 104, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check fprintf(fp, "%s", str) -> fputs(str, fp).
+; NOTE: The fputs simplifier simplifies this further to fwrite.
+
+define void @test_simplify3(%FILE* %fp) {
+; CHECK: @test_simplify3
+  %fmt = getelementptr [3 x i8]* @percent_s, i32 0, i32 0
+  %str = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, i8* %str)
+; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0), i32 12, i32 1, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check fprintf(fp, fmt, ...) -> fiprintf(fp, fmt, ...) if no floating point.
+
+define void @test_simplify4(%FILE* %fp) {
+; CHECK-IPRINTF: @test_simplify4
+  %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, i32 187)
+; CHECK-NEXT-IPRINTF: call i32 (%FILE*, i8*, ...)* @fiprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+  ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify1(%FILE* %fp) {
+; CHECK-IPRINTF: @test_no_simplify1
+  %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, double 1.87)
+; CHECK-NEXT-IPRINTF: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+  ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify2(%FILE* %fp, double %d) {
+; CHECK: @test_no_simplify2
+  %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
+  call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* %fmt, double %d)
+; CHECK-NEXT: call i32 (%FILE*, i8*, ...)* @fprintf(%FILE* %fp, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double %d)
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/test/Transforms/InstCombine/fputs-1.ll b/test/Transforms/InstCombine/fputs-1.ll
new file mode 100644
index 0000000000..c7c5becfd0
--- /dev/null
+++ b/test/Transforms/InstCombine/fputs-1.ll
@@ -0,0 +1,43 @@
+; Test that the fputs library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+%FILE = type { }
+
+@empty = constant [1 x i8] zeroinitializer
+@A = constant [2 x i8] c"A\00"
+@hello = constant [7 x i8] c"hello\0A\00"
+
+declare i32 @fputs(i8*, %FILE*)
+
+; Check fputs(str, fp) --> fwrite(str, 1, strlen(s), fp).
+
+define void @test_simplify1(%FILE* %fp) {
+; CHECK: @test_simplify1
+  %str = getelementptr [1 x i8]* @empty, i32 0, i32 0
+  call i32 @fputs(i8* %str, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; NOTE: The fwrite simplifier simplifies this further to fputc.
+
+define void @test_simplify2(%FILE* %fp) {
+; CHECK: @test_simplify2
+  %str = getelementptr [2 x i8]* @A, i32 0, i32 0
+  call i32 @fputs(i8* %str, %FILE* %fp)
+; CHECK-NEXT: call i32 @fputc(i32 65, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify3(%FILE* %fp) {
+; CHECK: @test_simplify3
+  %str = getelementptr [7 x i8]* @hello, i32 0, i32 0
+  call i32 @fputs(i8* %str, %FILE* %fp)
+; CHECK-NEXT: call i32 @fwrite(i8* getelementptr inbounds ([7 x i8]* @hello, i32 0, i32 0), i32 6, i32 1, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/test/Transforms/InstCombine/fwrite-1.ll b/test/Transforms/InstCombine/fwrite-1.ll
new file mode 100644
index 0000000000..528cdec217
--- /dev/null
+++ b/test/Transforms/InstCombine/fwrite-1.ll
@@ -0,0 +1,57 @@
+; Test that the fwrite library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+%FILE = type { }
+
+@str = constant [1 x i8] zeroinitializer
+@empty = constant [0 x i8] zeroinitializer
+
+declare i64 @fwrite(i8*, i64, i64, %FILE *)
+
+; Check fwrite(S, 1, 1, fp) -> fputc(S[0], fp).
+
+define void @test_simplify1(%FILE* %fp) {
+; CHECK: @test_simplify1
+  %str = getelementptr inbounds [1 x i8]* @str, i64 0, i64 0
+  call i64 @fwrite(i8* %str, i64 1, i64 1, %FILE* %fp)
+; CHECK-NEXT: call i32 @fputc(i32 0, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify2(%FILE* %fp) {
+; CHECK: @test_simplify2
+  %str = getelementptr inbounds [0 x i8]* @empty, i64 0, i64 0
+  call i64 @fwrite(i8* %str, i64 1, i64 0, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify3(%FILE* %fp) {
+; CHECK: @test_simplify3
+  %str = getelementptr inbounds [0 x i8]* @empty, i64 0, i64 0
+  call i64 @fwrite(i8* %str, i64 0, i64 1, %FILE* %fp)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define i64 @test_no_simplify1(%FILE* %fp) {
+; CHECK: @test_no_simplify1
+  %str = getelementptr inbounds [1 x i8]* @str, i64 0, i64 0
+  %ret = call i64 @fwrite(i8* %str, i64 1, i64 1, %FILE* %fp)
+; CHECK-NEXT: call i64 @fwrite
+  ret i64 %ret
+; CHECK-NEXT: ret i64 %ret
+}
+
+define void @test_no_simplify2(%FILE* %fp, i64 %size) {
+; CHECK: @test_no_simplify2
+  %str = getelementptr inbounds [1 x i8]* @str, i64 0, i64 0
+  call i64 @fwrite(i8* %str, i64 %size, i64 1, %FILE* %fp)
+; CHECK-NEXT: call i64 @fwrite
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index 6c8e634763..bbc70fe333 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -65,7 +65,7 @@ define i32 @test9(i32 %i) {
 ; CHECK: @test9
         %j = mul i32 %i, -1             ; <i32> [#uses=1]
         ret i32 %j
-; CHECJ: sub i32 0, %i
+; CHECK: sub i32 0, %i
 }
 
 define i32 @test10(i32 %a, i32 %b) {
diff --git a/test/Transforms/SimplifyLibCalls/osx-names.ll b/test/Transforms/InstCombine/osx-names.ll
index e321d1dd31..7b83526ace 100644
--- a/test/Transforms/SimplifyLibCalls/osx-names.ll
+++ b/test/Transforms/InstCombine/osx-names.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+; RUN: opt < %s -instcombine -S | FileCheck %s
 ; <rdar://problem/9815881>
 ; On OSX x86-32, fwrite and fputs aren't called fwrite and fputs.
 ; Make sure we use the correct names.
diff --git a/test/Transforms/InstCombine/printf-2.ll b/test/Transforms/InstCombine/printf-2.ll
new file mode 100644
index 0000000000..466ee1c757
--- /dev/null
+++ b/test/Transforms/InstCombine/printf-2.ll
@@ -0,0 +1,41 @@
+; Test that the printf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello_world = constant [13 x i8] c"hello world\0A\00"
+@h = constant [2 x i8] c"h\00"
+@percent_s = constant [4 x i8] c"%s\0A\00"
+
+declare void @printf(i8*, ...)
+
+; Check simplification of printf with void return type.
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+  %fmt = getelementptr [2 x i8]* @h, i32 0, i32 0
+  call void (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @putchar(i32 104)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+  %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call void (i8*, ...)* @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @str, i32 0, i32 0))
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify6() {
+; CHECK: @test_simplify6
+  %fmt = getelementptr [4 x i8]* @percent_s, i32 0, i32 0
+  %str = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call void (i8*, ...)* @printf(i8* %fmt, i8* %str)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0))
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/test/Transforms/InstCombine/puts-1.ll b/test/Transforms/InstCombine/puts-1.ll
new file mode 100644
index 0000000000..ef4e1bbd82
--- /dev/null
+++ b/test/Transforms/InstCombine/puts-1.ll
@@ -0,0 +1,31 @@
+; Test that the puts library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@empty = constant [1 x i8] zeroinitializer
+
+declare i32 @puts(i8*)
+
+; Check puts("") -> putchar('\n').
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+  %str = getelementptr [1 x i8]* @empty, i32 0, i32 0
+  call i32 @puts(i8* %str)
+; CHECK-NEXT: call i32 @putchar(i32 10)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Don't simplify if the return value is used.
+
+define i32 @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+  %str = getelementptr [1 x i8]* @empty, i32 0, i32 0
+  %ret = call i32 @puts(i8* %str)
+; CHECK-NEXT: call i32 @puts(i8* getelementptr inbounds ([1 x i8]* @empty, i32 0, i32 0))
+  ret i32 %ret
+; CHECK-NEXT: ret i32 %ret
+}
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 25e708b7f5..32867761a3 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -523,9 +523,9 @@ entry:
   %tmp51 = xor i8 %tmp50, %tmp5
   %tmp52 = and i8 %tmp51, -128
   %tmp53 = lshr i8 %tmp52, 7
-; CHECK: lshr i8 %tmp51, 7
   %tmp54 = mul i8 %tmp53, 16
-; CHECK: shl nuw nsw i8 %tmp53, 4
+; CHECK: %0 = shl i8 %tmp4, 2
+; CHECK: %tmp54 = and i8 %0, 16
   %tmp55 = xor i8 %tmp54, %tmp51
 ; CHECK: ret i8 %tmp551
   ret i8 %tmp55
@@ -659,3 +659,79 @@ define i32 @test53(i32 %x) {
 ; CHECK-NEXT: %B = shl nuw i32 %x, 2
 ; CHECK-NEXT: ret i32 %B
 }
+
+define i32 @test54(i32 %x) {
+  %shr2 = lshr i32 %x, 1
+  %shl = shl i32 %shr2, 4
+  %and = and i32 %shl, 16
+  ret i32 %and
+; CHECK: @test54
+; CHECK: shl i32 %x, 3
+}
+
+
+define i32 @test55(i32 %x) {
+  %shr2 = lshr i32 %x, 1
+  %shl = shl i32 %shr2, 4
+  %or = or i32 %shl, 8
+  ret i32 %or
+; CHECK: @test55
+; CHECK: shl i32 %x, 3
+}
+
+define i32 @test56(i32 %x) {
+  %shr2 = lshr i32 %x, 1
+  %shl = shl i32 %shr2, 4
+  %or = or i32 %shl, 7
+  ret i32 %or
+; CHECK: @test56
+; CHECK: shl i32 %shr2, 4
+}
+
+
+define i32 @test57(i32 %x) {
+  %shr = lshr i32 %x, 1
+  %shl = shl i32 %shr, 4
+  %and = and i32 %shl, 16
+  ret i32 %and
+; CHECK: @test57
+; CHECK: shl i32 %x, 3
+}
+
+define i32 @test58(i32 %x) {
+  %shr = lshr i32 %x, 1
+  %shl = shl i32 %shr, 4
+  %or = or i32 %shl, 8
+  ret i32 %or
+; CHECK: @test58
+; CHECK: shl i32 %x, 3
+}
+
+define i32 @test59(i32 %x) {
+  %shr = ashr i32 %x, 1
+  %shl = shl i32 %shr, 4
+  %or = or i32 %shl, 7
+  ret i32 %or
+; CHECK: @test59
+; CHECK: %shl = shl i32 %shr1, 4
+}
+
+
+define i32 @test60(i32 %x) {
+  %shr = ashr i32 %x, 4
+  %shl = shl i32 %shr, 1
+  %or = or i32 %shl, 1
+  ret i32 %or
+; CHECK: @test60
+; CHECK: ashr i32 %x, 3
+}
+
+
+define i32 @test61(i32 %x) {
+  %shr = ashr i32 %x, 4
+  %shl = shl i32 %shr, 1
+  %or = or i32 %shl, 2
+  ret i32 %or
+; CHECK: @test61
+; CHECK: ashr i32 %x, 4
+}
diff --git a/test/Transforms/InstCombine/signext.ll b/test/Transforms/InstCombine/signext.ll
index ecee9830cd..5ed1cd5590 100644
--- a/test/Transforms/InstCombine/signext.ll
+++ b/test/Transforms/InstCombine/signext.ll
@@ -82,6 +82,6 @@ entry:
   %sub = add i32 %xor, -67108864                  ; <i32> [#uses=1]
   ret i32 %sub
 ; CHECK: @test8
-; CHECK: %shr = ashr i32 %x, 5
-; CHECK: ret i32 %shr
+; CHECK: %sub = ashr i32 %x, 5
+; CHECK: ret i32 %sub
 }
diff --git a/test/Transforms/InstCombine/sprintf-1.ll b/test/Transforms/InstCombine/sprintf-1.ll
new file mode 100644
index 0000000000..9b8c8b1b12
--- /dev/null
+++ b/test/Transforms/InstCombine/sprintf-1.ll
@@ -0,0 +1,100 @@
+; Test that the sprintf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple xcore-xmos-elf -instcombine -S | FileCheck %s -check-prefix=IPRINTF
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello_world = constant [13 x i8] c"hello world\0A\00"
+@null = constant [1 x i8] zeroinitializer
+@null_hello = constant [7 x i8] c"\00hello\00"
+@h = constant [2 x i8] c"h\00"
+@percent_c = constant [3 x i8] c"%c\00"
+@percent_d = constant [3 x i8] c"%d\00"
+@percent_f = constant [3 x i8] c"%f\00"
+@percent_s = constant [3 x i8] c"%s\00"
+
+declare i32 @sprintf(i8*, i8*, ...)
+
+; Check sprintf(dst, fmt) -> llvm.memcpy(str, fmt, strlen(fmt) + 1, 1).
+
+define void @test_simplify1(i8* %dst) {
+; CHECK: @test_simplify1
+  %fmt = getelementptr [13 x i8]* @hello_world, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* getelementptr inbounds ([13 x i8]* @hello_world, i32 0, i32 0), i32 13, i32 1, i1 false)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify2(i8* %dst) {
+; CHECK: @test_simplify2
+  %fmt = getelementptr [1 x i8]* @null, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt)
+; CHECK-NEXT: store i8 0, i8* %dst, align 1
+  ret void
+; CHECK-NEXT: ret void
+}
+
+define void @test_simplify3(i8* %dst) {
+; CHECK: @test_simplify3
+  %fmt = getelementptr [7 x i8]* @null_hello, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt)
+; CHECK-NEXT: store i8 0, i8* %dst, align 1
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check sprintf(dst, "%c", chr) -> *(i8*)dst = chr; *((i8*)dst + 1) = 0.
+
+define void @test_simplify4(i8* %dst) {
+; CHECK: @test_simplify4
+  %fmt = getelementptr [3 x i8]* @percent_c, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i8 104)
+; CHECK-NEXT: store i8 104, i8* %dst, align 1
+; CHECK-NEXT: [[NUL:%[a-z0-9]+]] = getelementptr i8* %dst, i32 1
+; CHECK-NEXT: store i8 0, i8* [[NUL]], align 1
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check sprintf(dst, "%s", str) -> llvm.memcpy(dest, str, strlen(str) + 1, 1).
+
+define void @test_simplify5(i8* %dst, i8* %str) {
+; CHECK: @test_simplify5
+  %fmt = getelementptr [3 x i8]* @percent_s, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i8* %str)
+; CHECK-NEXT: [[STRLEN:%[a-z0-9]+]] = call i32 @strlen(i8* %str)
+; CHECK-NEXT: [[LENINC:%[a-z0-9]+]] = add i32 [[STRLEN]], 1
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %str, i32 [[LENINC]], i32 1, i1 false)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+; Check sprintf(dst, format, ...) -> siprintf(str, format, ...) if no floating.
+
+define void @test_simplify6(i8* %dst) {
+; CHECK-IPRINTF: @test_simplify6
+  %fmt = getelementptr [3 x i8]* @percent_d, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, i32 187)
+; CHECK-NEXT-IPRINTF: call i32 (i8*, i8*, ...)* @siprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8]* @percent_d, i32 0, i32 0), i32 187)
+  ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify1(i8* %dst) {
+; CHECK-IPRINTF: @test_no_simplify1
+  %fmt = getelementptr [3 x i8]* @percent_f, i32 0, i32 0
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, double 1.87)
+; CHECK-NEXT-IPRINTF: call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* getelementptr inbounds ([3 x i8]* @percent_f, i32 0, i32 0), double 1.870000e+00)
+  ret void
+; CHECK-NEXT-IPRINTF: ret void
+}
+
+define void @test_no_simplify2(i8* %dst, i8* %fmt, double %d) {
+; CHECK: @test_no_simplify2
+  call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, double %d)
+; CHECK-NEXT: call i32 (i8*, i8*, ...)* @sprintf(i8* %dst, i8* %fmt, double %d)
+  ret void
+; CHECK-NEXT: ret void
+}
diff --git a/test/Transforms/InstCombine/xor2.ll b/test/Transforms/InstCombine/xor2.ll
index 3c99246796..be06d7999d 100644
--- a/test/Transforms/InstCombine/xor2.ll
+++ b/test/Transforms/InstCombine/xor2.ll
@@ -66,3 +66,19 @@ test5:
 ; CHECK: lshr i32 %val1, 8
 ; CHECK: ret
 }
+
+; defect-1 in rdar://12329730
+; Simplify (X^Y) -> X or Y in the user's context if we know that 
+; only bits from X or Y are demanded.
+; e.g. the "x ^ 1234" can be optimized into x in the context of "t >> 16".
+;  Put in other word, t >> 16 -> x >> 16.
+; unsigned foo(unsigned x) { unsigned t = x ^ 1234; ;  return (t >> 16) + t;}
+define i32 @test6(i32 %x) {
+  %xor = xor i32 %x, 1234
+  %shr = lshr i32 %xor, 16
+  %add = add i32 %shr, %xor
+  ret i32 %add
+; CHECK: @test6
+; CHECK: lshr i32 %x, 16
+; CHECK: ret
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index ce2bb799c8..56627b99a4 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -165,6 +165,46 @@ entry:
   ret i1 %cmp
 }
 
+define i1 @gep13(i8* %ptr) {
+; CHECK: @gep13
+; We can prove this GEP is non-null because it is inbounds.
+  %x = getelementptr inbounds i8* %ptr, i32 1
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep14({ {}, i8 }* %ptr) {
+; CHECK: @gep14
+; We can't simplify this because the offset of one in the GEP actually doesn't
+; move the pointer.
+  %x = getelementptr inbounds { {}, i8 }* %ptr, i32 0, i32 1
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NOT: ret i1 false
+}
+
+define i1 @gep15({ {}, [4 x {i8, i8}]}* %ptr, i32 %y) {
+; CHECK: @gep15
+; We can prove this GEP is non-null even though there is a user value, as we
+; would necessarily violate inbounds on one side or the other.
+  %x = getelementptr inbounds { {}, [4 x {i8, i8}]}* %ptr, i32 0, i32 1, i32 %y, i32 1
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @gep16(i8* %ptr, i32 %a) {
+; CHECK: @gep16
+; We can prove this GEP is non-null because it is inbounds and because we know
+; %b is non-zero even though we don't know its value.
+  %b = or i32 %a, 1
+  %x = getelementptr inbounds i8* %ptr, i32 %b
+  %cmp = icmp eq i8* %x, null
+  ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
 define i1 @zext(i32 %x) {
 ; CHECK: @zext
   %e1 = zext i32 %x to i64
diff --git a/test/Transforms/InstSimplify/fast-math.ll b/test/Transforms/InstSimplify/fast-math.ll
new file mode 100644
index 0000000000..e4b3ea306a
--- /dev/null
+++ b/test/Transforms/InstSimplify/fast-math.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+;; x * 0 ==> 0 when no-nans and no-signed-zero
+; CHECK: mul_zero_1
+define float @mul_zero_1(float %a) {
+  %b = fmul nsz nnan float %a, 0.0
+; CHECK: ret float 0.0
+  ret float %b
+}
+; CHECK: mul_zero_2
+define float @mul_zero_2(float %a) {
+  %b = fmul fast float 0.0, %a
+; CHECK: ret float 0.0
+  ret float %b
+}
+
+;; x * 0 =/=> 0 when there could be nans or -0
+; CHECK: no_mul_zero_1
+define float @no_mul_zero_1(float %a) {
+  %b = fmul nsz float %a, 0.0
+; CHECK: ret float %b
+  ret float %b
+}
+; CHECK: no_mul_zero_2
+define float @no_mul_zero_2(float %a) {
+  %b = fmul nnan float %a, 0.0
+; CHECK: ret float %b
+  ret float %b
+}
+; CHECK: no_mul_zero_3
+define float @no_mul_zero_3(float %a) {
+  %b = fmul float %a, 0.0
+; CHECK: ret float %b
+  ret float %b
+}
diff --git a/test/Transforms/LoopIdiom/X86/lit.local.cfg b/test/Transforms/LoopIdiom/X86/lit.local.cfg
new file mode 100644
index 0000000000..a8ad0f1a28
--- /dev/null
+++ b/test/Transforms/LoopIdiom/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopIdiom/X86/popcnt.ll b/test/Transforms/LoopIdiom/X86/popcnt.ll
new file mode 100644
index 0000000000..2f458fb2f1
--- /dev/null
+++ b/test/Transforms/LoopIdiom/X86/popcnt.ll
@@ -0,0 +1,120 @@
+; RUN: opt -loop-idiom < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -S | FileCheck %s
+
+;To recognize this pattern:
+;int popcount(unsigned long long a) {
+;    int c = 0;
+;    while (a) {
+;        c++;
+;        a &= a - 1;
+;    }
+;    return c;
+;}
+; 
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount(i64 %a) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i64 %a, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.05, 1
+  %sub = add i64 %a.addr.04, -1
+  %and = and i64 %sub, %a.addr.04
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+}
+
+; To recognize this pattern:
+;int popcount(unsigned long long a, int mydata1, int mydata2) {
+;    int c = 0;
+;    while (a) {
+;        c++;
+;        a &= a - 1;
+;        mydata1 *= c;
+;        mydata2 *= (int)a;
+;    }
+;    return c + mydata1 + mydata2;
+;}
+; CHECK: entry
+; CHECK: llvm.ctpop.i64
+; CHECK: ret
+define i32 @popcount2(i64 %a, i32 %mydata1, i32 %mydata2) nounwind uwtable readnone ssp {
+entry:
+  %tobool9 = icmp eq i64 %a, 0
+  br i1 %tobool9, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.013 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %mydata2.addr.012 = phi i32 [ %mul1, %while.body ], [ %mydata2, %entry ]
+  %mydata1.addr.011 = phi i32 [ %mul, %while.body ], [ %mydata1, %entry ]
+  %a.addr.010 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.013, 1
+  %sub = add i64 %a.addr.010, -1
+  %and = and i64 %sub, %a.addr.010
+  %mul = mul nsw i32 %inc, %mydata1.addr.011
+  %conv = trunc i64 %and to i32
+  %mul1 = mul nsw i32 %conv, %mydata2.addr.012
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  %mydata2.addr.0.lcssa = phi i32 [ %mydata2, %entry ], [ %mul1, %while.body ]
+  %mydata1.addr.0.lcssa = phi i32 [ %mydata1, %entry ], [ %mul, %while.body ]
+  %add = add i32 %mydata2.addr.0.lcssa, %mydata1.addr.0.lcssa
+  %add2 = add i32 %add, %c.0.lcssa
+  ret i32 %add2
+}
+
+; Some variants once cause crash
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @PopCntCrash1(i64 %a) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i64 %a, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+  %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %t = add i32 %c.05, %c.05
+  %inc = add nsw i32 %t, 1
+  %sub = add i64 %a.addr.04, -1
+  %and = and i64 %sub, %a.addr.04
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+
+; CHECK: entry
+; CHECK: ret 
+}
+
+define i32 @PopCntCrash2(i64 %a, i32 %b) nounwind uwtable readnone ssp {
+entry:
+  %tobool3 = icmp eq i64 %a, 0
+  br i1 %tobool3, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %c.05 = phi i32 [ %inc, %while.body ], [ %b, %entry ]
+  %a.addr.04 = phi i64 [ %and, %while.body ], [ %a, %entry ]
+  %inc = add nsw i32 %c.05, 1
+  %sub = add i64 %a.addr.04, -1
+  %and = and i64 %sub, %a.addr.04
+  %tobool = icmp eq i64 %and, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  ret i32 %c.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll
index c34fd72a46..f1bf6cb6d8 100644
--- a/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -89,9 +89,8 @@ define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
   ret void
 }
 
-; We can't vectorize this loop because it has non constant loop bounds.
 ;CHECK: @example3
-;CHECK-NOT: <4 x i32>
+;CHECK: <4 x i32>
 ;CHECK: ret void
 define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
   %1 = icmp eq i32 %n, 0
@@ -537,9 +536,8 @@ define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocaptu
   ret void
 }
 
-; Can't vectorize because the src and dst pointers are not disjoint.
 ;CHECK: @example21
-;CHECK-NOT: <4 x i32>
+;CHECK: <4 x i32>
 ;CHECK: ret i32
 define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
   %1 = icmp sgt i32 %n, 0
diff --git a/test/Transforms/LoopVectorize/if-conversion-reduction.ll b/test/Transforms/LoopVectorize/if-conversion-reduction.ll
new file mode 100644
index 0000000000..bacf9c00d0
--- /dev/null
+++ b/test/Transforms/LoopVectorize/if-conversion-reduction.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -enable-if-conversion -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+;CHECK: @reduction_func
+;CHECK-NOT: load <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  %cmp10 = icmp sgt i32 %n, 0
+  br i1 %cmp10, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %sum.011 = phi i32 [ %sum.1, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 30
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %add = add i32 %sum.011, 2
+  %add4 = add i32 %add, %0
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %sum.1 = phi i32 [ %add4, %if.then ], [ %sum.011, %for.body ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ 4, %for.inc ]
+  ret i32 %sum.0.lcssa
+}
+
diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll
new file mode 100644
index 0000000000..b4701b9655
--- /dev/null
+++ b/test/Transforms/LoopVectorize/if-conversion.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -enable-if-conversion -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; This is the loop in this example:
+;
+;int function0(int *a, int *b, int start, int end) {
+;
+;  for (int i=start; i<end; ++i) {
+;    unsigned k = a[i];
+;
+;    if (a[i] > b[i])   <------ notice the IF inside the loop.
+;      k = k * 5 + 3;
+;
+;    a[i] = k;  <---- K is a phi node that becomes vector-select.
+;  }
+;}
+
+;CHECK: @function0
+;CHECK: load <4 x i32>
+;CHECK: icmp sgt <4 x i32>
+;CHECK: mul <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: select <4 x i1>
+;CHECK: ret i32
+define i32 @function0(i32* nocapture %a, i32* nocapture %b, i32 %start, i32 %end) nounwind uwtable ssp {
+entry:
+  %cmp16 = icmp slt i32 %start, %end
+  br i1 %cmp16, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:
+  %0 = sext i32 %start to i64
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %if.end ]
+  %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+  %1 = load i32* %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32* %b, i64 %indvars.iv
+  %2 = load i32* %arrayidx4, align 4
+  %cmp5 = icmp sgt i32 %1, %2
+  br i1 %cmp5, label %if.then, label %if.end
+
+if.then:
+  %mul = mul i32 %1, 5
+  %add = add i32 %mul, 3
+  br label %if.end
+
+if.end:
+  %k.0 = phi i32 [ %add, %if.then ], [ %1, %for.body ]
+  store i32 %k.0, i32* %arrayidx, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %3 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp slt i32 %3, %end
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret i32 undef
+}
+
+
+
+; int func(int *A, int n) {
+;   unsigned sum = 0;
+;   for (int i = 0; i < n; ++i)
+;     if (A[i] > 30)
+;       sum += A[i] + 2;
+;
+;   return sum;
+; }
+
+;CHECK: @reduction_func
+;CHECK: load <4 x i32>
+;CHECK: icmp sgt <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: select <4 x i1>
+;CHECK: ret i32
+define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  %cmp10 = icmp sgt i32 %n, 0
+  br i1 %cmp10, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %sum.011 = phi i32 [ %sum.1, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+  %0 = load i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 30
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %add = add i32 %sum.011, 2
+  %add4 = add i32 %add, %0
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %sum.1 = phi i32 [ %add4, %if.then ], [ %sum.011, %for.body ]
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %sum.1, %for.inc ]
+  ret i32 %sum.0.lcssa
+}
+
diff --git a/test/Transforms/LoopVectorize/intrinsic.ll b/test/Transforms/LoopVectorize/intrinsic.ll
new file mode 100644
index 0000000000..54e3c69fe1
--- /dev/null
+++ b/test/Transforms/LoopVectorize/intrinsic.ll
@@ -0,0 +1,851 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;CHECK: @sqrt_f32
+;CHECK: llvm.sqrt.v4f32
+;CHECK: ret void
+define void @sqrt_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.sqrt.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.sqrt.f32(float) nounwind readnone
+
+;CHECK: @sqrt_f64
+;CHECK: llvm.sqrt.v4f64
+;CHECK: ret void
+define void @sqrt_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.sqrt.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.sqrt.f64(double) nounwind readnone
+
+;CHECK: @sin_f32
+;CHECK: llvm.sin.v4f32
+;CHECK: ret void
+define void @sin_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.sin.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.sin.f32(float) nounwind readnone
+
+;CHECK: @sin_f64
+;CHECK: llvm.sin.v4f64
+;CHECK: ret void
+define void @sin_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.sin.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.sin.f64(double) nounwind readnone
+
+;CHECK: @cos_f32
+;CHECK: llvm.cos.v4f32
+;CHECK: ret void
+define void @cos_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.cos.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.cos.f32(float) nounwind readnone
+
+;CHECK: @cos_f64
+;CHECK: llvm.cos.v4f64
+;CHECK: ret void
+define void @cos_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.cos.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.cos.f64(double) nounwind readnone
+
+;CHECK: @exp_f32
+;CHECK: llvm.exp.v4f32
+;CHECK: ret void
+define void @exp_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.exp.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.exp.f32(float) nounwind readnone
+
+;CHECK: @exp_f64
+;CHECK: llvm.exp.v4f64
+;CHECK: ret void
+define void @exp_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.exp.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.exp.f64(double) nounwind readnone
+
+;CHECK: @exp2_f32
+;CHECK: llvm.exp2.v4f32
+;CHECK: ret void
+define void @exp2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.exp2.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.exp2.f32(float) nounwind readnone
+
+;CHECK: @exp2_f64
+;CHECK: llvm.exp2.v4f64
+;CHECK: ret void
+define void @exp2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.exp2.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.exp2.f64(double) nounwind readnone
+
+;CHECK: @log_f32
+;CHECK: llvm.log.v4f32
+;CHECK: ret void
+define void @log_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.log.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.log.f32(float) nounwind readnone
+
+;CHECK: @log_f64
+;CHECK: llvm.log.v4f64
+;CHECK: ret void
+define void @log_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.log.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.log.f64(double) nounwind readnone
+
+;CHECK: @log10_f32
+;CHECK: llvm.log10.v4f32
+;CHECK: ret void
+define void @log10_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.log10.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.log10.f32(float) nounwind readnone
+
+;CHECK: @log10_f64
+;CHECK: llvm.log10.v4f64
+;CHECK: ret void
+define void @log10_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.log10.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.log10.f64(double) nounwind readnone
+
+;CHECK: @log2_f32
+;CHECK: llvm.log2.v4f32
+;CHECK: ret void
+define void @log2_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.log2.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.log2.f32(float) nounwind readnone
+
+;CHECK: @log2_f64
+;CHECK: llvm.log2.v4f64
+;CHECK: ret void
+define void @log2_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.log2.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.log2.f64(double) nounwind readnone
+
+;CHECK: @fabs_f32
+;CHECK: llvm.fabs.v4f32
+;CHECK: ret void
+define void @fabs_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.fabs.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.fabs.f32(float) nounwind readnone
+
+define void @fabs_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.fabs(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.fabs(double) nounwind readnone
+
+;CHECK: @floor_f32
+;CHECK: llvm.floor.v4f32
+;CHECK: ret void
+define void @floor_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.floor.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.floor.f32(float) nounwind readnone
+
+;CHECK: @floor_f64
+;CHECK: llvm.floor.v4f64
+;CHECK: ret void
+define void @floor_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.floor.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.floor.f64(double) nounwind readnone
+
+;CHECK: @ceil_f32
+;CHECK: llvm.ceil.v4f32
+;CHECK: ret void
+define void @ceil_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.ceil.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.ceil.f32(float) nounwind readnone
+
+;CHECK: @ceil_f64
+;CHECK: llvm.ceil.v4f64
+;CHECK: ret void
+define void @ceil_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.ceil.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.ceil.f64(double) nounwind readnone
+
+;CHECK: @trunc_f32
+;CHECK: llvm.trunc.v4f32
+;CHECK: ret void
+define void @trunc_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.trunc.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.trunc.f32(float) nounwind readnone
+
+;CHECK: @trunc_f64
+;CHECK: llvm.trunc.v4f64
+;CHECK: ret void
+define void @trunc_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.trunc.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.trunc.f64(double) nounwind readnone
+
+;CHECK: @rint_f32
+;CHECK: llvm.rint.v4f32
+;CHECK: ret void
+define void @rint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.rint.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.rint.f32(float) nounwind readnone
+
+;CHECK: @rint_f64
+;CHECK: llvm.rint.v4f64
+;CHECK: ret void
+define void @rint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.rint.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.rint.f64(double) nounwind readnone
+
+;CHECK: @nearbyint_f32
+;CHECK: llvm.nearbyint.v4f32
+;CHECK: ret void
+define void @nearbyint_f32(i32 %n, float* noalias %y, float* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %call = tail call float @llvm.nearbyint.f32(float %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx2, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.nearbyint.f32(float) nounwind readnone
+
+;CHECK: @nearbyint_f64
+;CHECK: llvm.nearbyint.v4f64
+;CHECK: ret void
+define void @nearbyint_f64(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable {
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %call = tail call double @llvm.nearbyint.f64(double %0) nounwind readnone
+  %arrayidx2 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx2, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.nearbyint.f64(double) nounwind readnone
+
+;CHECK: @fma_f32
+;CHECK: llvm.fma.v4f32
+;CHECK: ret void
+define void @fma_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z, float* noalias %w) nounwind uwtable {
+entry:
+  %cmp12 = icmp sgt i32 %n, 0
+  br i1 %cmp12, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds float* %w, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %arrayidx4 = getelementptr inbounds float* %z, i64 %indvars.iv
+  %2 = load float* %arrayidx4, align 4, !tbaa !0
+  %3 = tail call float @llvm.fma.f32(float %0, float %2, float %1)
+  %arrayidx6 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %3, float* %arrayidx6, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+
+;CHECK: @fma_f64
+;CHECK: llvm.fma.v4f64
+;CHECK: ret void
+define void @fma_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z, double* noalias %w) nounwind uwtable {
+entry:
+  %cmp12 = icmp sgt i32 %n, 0
+  br i1 %cmp12, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %arrayidx2 = getelementptr inbounds double* %w, i64 %indvars.iv
+  %1 = load double* %arrayidx2, align 8, !tbaa !3
+  %arrayidx4 = getelementptr inbounds double* %z, i64 %indvars.iv
+  %2 = load double* %arrayidx4, align 8, !tbaa !3
+  %3 = tail call double @llvm.fma.f64(double %0, double %2, double %1)
+  %arrayidx6 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %3, double* %arrayidx6, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
+
+;CHECK: @pow_f32
+;CHECK: llvm.pow.v4f32
+;CHECK: ret void
+define void @pow_f32(i32 %n, float* noalias %y, float* noalias %x, float* noalias %z) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float* %y, i64 %indvars.iv
+  %0 = load float* %arrayidx, align 4, !tbaa !0
+  %arrayidx2 = getelementptr inbounds float* %z, i64 %indvars.iv
+  %1 = load float* %arrayidx2, align 4, !tbaa !0
+  %call = tail call float @llvm.pow.f32(float %0, float %1) nounwind readnone
+  %arrayidx4 = getelementptr inbounds float* %x, i64 %indvars.iv
+  store float %call, float* %arrayidx4, align 4, !tbaa !0
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float @llvm.pow.f32(float, float) nounwind readnone
+
+;CHECK: @pow_f64
+;CHECK: llvm.pow.v4f64
+;CHECK: ret void
+define void @pow_f64(i32 %n, double* noalias %y, double* noalias %x, double* noalias %z) nounwind uwtable {
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv
+  %0 = load double* %arrayidx, align 8, !tbaa !3
+  %arrayidx2 = getelementptr inbounds double* %z, i64 %indvars.iv
+  %1 = load double* %arrayidx2, align 8, !tbaa !3
+  %call = tail call double @llvm.pow.f64(double %0, double %1) nounwind readnone
+  %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv
+  store double %call, double* %arrayidx4, align 8, !tbaa !3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare double @llvm.pow.f64(double, double) nounwind readnone
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"double", metadata !1}
+!4 = metadata !{metadata !"int", metadata !1}
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index b363eefb3f..9fe926ee2c 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1134,3 +1134,45 @@ entry:
   ret void
 ; CHECK: ret
 }
+
+define void @PR14465() {
+; Ensure that we don't crash when analyzing a alloca larger than the maximum
+; integer type width (MAX_INT_BITS) supported by llvm (1048576*32 > (1<<23)-1).
+; CHECK: @PR14465
+
+  %stack = alloca [1048576 x i32], align 16
+; CHECK: alloca [1048576 x i32]
+  %cast = bitcast [1048576 x i32]* %stack to i8*
+  call void @llvm.memset.p0i8.i64(i8* %cast, i8 -2, i64 4194304, i32 16, i1 false)
+  ret void
+; CHECK: ret
+}
+
+define void @PR14548(i1 %x) {
+; Handle a mixture of i1 and i8 loads and stores to allocas. This particular
+; pattern caused crashes and invalid output in the PR, and its nature will
+; trigger a mixture in several permutations as we resolve each alloca
+; iteratively.
+; Note that we don't do a particularly good *job* of handling these mixtures,
+; but the hope is that this is very rare.
+; CHECK: @PR14548
+
+entry:
+  %a = alloca <{ i1 }>, align 8
+  %b = alloca <{ i1 }>, align 8
+; Nothing of interest is simplified here.
+; CHECK: alloca
+; CHECK: alloca
+
+  %b.i1 = bitcast <{ i1 }>* %b to i1*
+  store i1 %x, i1* %b.i1, align 8
+  %b.i8 = bitcast <{ i1 }>* %b to i8*
+  %foo = load i8* %b.i8, align 1
+
+  %a.i8 = bitcast <{ i1 }>* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i32 1, i1 false) nounwind
+  %bar = load i8* %a.i8, align 1
+  %a.i1 = getelementptr inbounds <{ i1 }>* %a, i32 0, i32 0
+  %baz = load i1* %a.i1, align 1
+  ret void
+}
diff --git a/test/Transforms/SROA/big-endian.ll b/test/Transforms/SROA/big-endian.ll
index ce82d1f30b..1ac6d25d63 100644
--- a/test/Transforms/SROA/big-endian.ll
+++ b/test/Transforms/SROA/big-endian.ll
@@ -82,14 +82,9 @@ entry:
 
   %a0i16ptr = bitcast i8* %a0ptr to i16*
   store i16 1, i16* %a0i16ptr
-; CHECK:      %[[mask0:.*]] = and i16 1, -16
-
-  %a1i4ptr = bitcast i8* %a1ptr to i4*
-  store i4 1, i4* %a1i4ptr
-; CHECK-NEXT: %[[insert0:.*]] = or i16 %[[mask0]], 1
 
   store i8 1, i8* %a2ptr
-; CHECK-NEXT: %[[mask1:.*]] = and i40 undef, 4294967295
+; CHECK:      %[[mask1:.*]] = and i40 undef, 4294967295
 ; CHECK-NEXT: %[[insert1:.*]] = or i40 %[[mask1]], 4294967296
 
   %a3i24ptr = bitcast i8* %a3ptr to i24*
@@ -110,7 +105,7 @@ entry:
   %ai = load i56* %aiptr
   %ret = zext i56 %ai to i64
   ret i64 %ret
-; CHECK-NEXT: %[[ext4:.*]] = zext i16 %[[insert0]] to i56
+; CHECK-NEXT: %[[ext4:.*]] = zext i16 1 to i56
 ; CHECK-NEXT: %[[shift4:.*]] = shl i56 %[[ext4]], 40
 ; CHECK-NEXT: %[[mask4:.*]] = and i56 %[[insert3]], 1099511627775
 ; CHECK-NEXT: %[[insert4:.*]] = or i56 %[[mask4]], %[[shift4]]
diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll
index f1e118955d..bb34e3f084 100644
--- a/test/Transforms/SROA/vector-promotion.ll
+++ b/test/Transforms/SROA/vector-promotion.ll
@@ -295,7 +295,7 @@ entry:
 }
 
 define <2 x i8> @PR14349.1(i32 %x) {
-; CEHCK: @PR14349.1
+; CHECK: @PR14349.1
 ; The first testcase for broken SROA rewriting of split integer loads and
 ; stores due to smaller vector loads and stores. This particular test ensures
 ; that we can rewrite a split store of an integer to a store of a vector.
@@ -317,7 +317,7 @@ entry:
 }
 
 define i32 @PR14349.2(<2 x i8> %x) {
-; CEHCK: @PR14349.2
+; CHECK: @PR14349.2
 ; The first testcase for broken SROA rewriting of split integer loads and
 ; stores due to smaller vector loads and stores. This particular test ensures
 ; that we can rewrite a split load of an integer to a load of a vector.
diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 8a59992f5e..5f70465c64 100644
--- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -777,3 +777,29 @@ return:
 ; CHECK: switch.lookup:
 ; CHECK: getelementptr inbounds [5 x i32]* @switch.table6, i32 0, i32 %switch.tableidx
 }
+
+; Don't create a table with illegal type
+; rdar://12779436
+define i96 @illegaltype(i32 %c) {
+entry:
+  switch i32 %c, label %sw.default [
+    i32 42, label %return
+    i32 43, label %sw.bb1
+    i32 44, label %sw.bb2
+    i32 45, label %sw.bb3
+    i32 46, label %sw.bb4
+  ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.default: br label %return
+return:
+  %retval.0 = phi i96 [ 15, %sw.default ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
+  ret i96 %retval.0
+
+; CHECK: @illegaltype
+; CHECK-NOT: @switch.table
+; CHECK: switch i32 %c
+}
diff --git a/test/Transforms/SimplifyLibCalls/FPrintF.ll b/test/Transforms/SimplifyLibCalls/FPrintF.ll
deleted file mode 100644
index 51733e4a1e..0000000000
--- a/test/Transforms/SimplifyLibCalls/FPrintF.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; Test that the FPrintFOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep "call.*fprintf"
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
-	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
-@str = constant [3 x i8] c"%s\00"		; <[3 x i8]*> [#uses=1]
-@chr = constant [3 x i8] c"%c\00"		; <[3 x i8]*> [#uses=1]
-@hello = constant [13 x i8] c"hello world\0A\00"		; <[13 x i8]*> [#uses=1]
-@stdout = external global %struct._IO_FILE*		; <%struct._IO_FILE**> [#uses=3]
-
-declare i32 @fprintf(%struct._IO_FILE*, i8*, ...)
-
-define i32 @foo() {
-entry:
-	%tmp.1 = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=1]
-	%tmp.0 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp.1, i8* getelementptr ([13 x i8]* @hello, i32 0, i32 0) )		; <i32> [#uses=0]
-	%tmp.4 = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=1]
-	%tmp.3 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp.4, i8* getelementptr ([3 x i8]* @str, i32 0, i32 0), i8* getelementptr ([13 x i8]* @hello, i32 0, i32 0) )		; <i32> [#uses=0]
-	%tmp.8 = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=1]
-	%tmp.7 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp.8, i8* getelementptr ([3 x i8]* @chr, i32 0, i32 0), i32 33 )		; <i32> [#uses=0]
-	ret i32 0
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/FPuts.ll b/test/Transforms/SimplifyLibCalls/FPuts.ll
deleted file mode 100644
index aa01aba265..0000000000
--- a/test/Transforms/SimplifyLibCalls/FPuts.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; Test that the FPutsOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep "call.*fputs"
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
-	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
-@stdout = external global %struct._IO_FILE*		; <%struct._IO_FILE**> [#uses=1]
-@empty = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-@len1 = constant [2 x i8] c"A\00"		; <[2 x i8]*> [#uses=1]
-@long = constant [7 x i8] c"hello\0A\00"		; <[7 x i8]*> [#uses=1]
-
-declare i32 @fputs(i8*, %struct._IO_FILE*)
-
-define i32 @main() {
-entry:
-	%out = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=3]
-	%s1 = getelementptr [1 x i8]* @empty, i32 0, i32 0		; <i8*> [#uses=1]
-	%s2 = getelementptr [2 x i8]* @len1, i32 0, i32 0		; <i8*> [#uses=1]
-	%s3 = getelementptr [7 x i8]* @long, i32 0, i32 0		; <i8*> [#uses=1]
-	%a = call i32 @fputs( i8* %s1, %struct._IO_FILE* %out )		; <i32> [#uses=0]
-	%b = call i32 @fputs( i8* %s2, %struct._IO_FILE* %out )		; <i32> [#uses=0]
-	%c = call i32 @fputs( i8* %s3, %struct._IO_FILE* %out )		; <i32> [#uses=0]
-	ret i32 0
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/Puts.ll b/test/Transforms/SimplifyLibCalls/Puts.ll
deleted file mode 100644
index aa68904810..0000000000
--- a/test/Transforms/SimplifyLibCalls/Puts.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; Test that the PutsOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "p:64:64:64"
-
-@.str = private constant [1 x i8] zeroinitializer
-
-declare i32 @puts(i8*)
-
-define void @foo() {
-entry:
-; CHECK: call i32 @putchar(i32 10)
-  %call = call i32 @puts(i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0))
-  ret void
-}
diff --git a/test/Transforms/SimplifyLibCalls/SPrintF.ll b/test/Transforms/SimplifyLibCalls/SPrintF.ll
deleted file mode 100644
index 514a7d9f6e..0000000000
--- a/test/Transforms/SimplifyLibCalls/SPrintF.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; Test that the SPrintFOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep "call.*sprintf"
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
-@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-@null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
-@fmt1 = constant [3 x i8] c"%s\00"		; <[3 x i8]*> [#uses=1]
-@fmt2 = constant [3 x i8] c"%c\00"		; <[3 x i8]*> [#uses=1]
-
-declare i32 @sprintf(i8*, i8*, ...)
-
-declare i32 @puts(i8*)
-
-define i32 @foo(i8* %p) {
-	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
-	%target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=7]
-	%hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=2]
-	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
-	%nh_p = getelementptr [7 x i8]* @null_hello, i32 0, i32 0		; <i8*> [#uses=1]
-	%fmt1_p = getelementptr [3 x i8]* @fmt1, i32 0, i32 0		; <i8*> [#uses=2]
-	%fmt2_p = getelementptr [3 x i8]* @fmt2, i32 0, i32 0		; <i8*> [#uses=1]
-	store i8 0, i8* %target_p
-	%r1 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %hello_p )		; <i32> [#uses=1]
-	%r2 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %null_p )		; <i32> [#uses=1]
-	%r3 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %nh_p )		; <i32> [#uses=1]
-	%r4 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %fmt1_p, i8* %hello_p )		; <i32> [#uses=1]
-	%r4.1 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %fmt1_p, i8* %p )		; <i32> [#uses=1]
-	%r5 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %fmt2_p, i32 82 )		; <i32> [#uses=1]
-	%r6 = add i32 %r1, %r2		; <i32> [#uses=1]
-	%r7 = add i32 %r3, %r6		; <i32> [#uses=1]
-	%r8 = add i32 %r5, %r7		; <i32> [#uses=1]
-	%r9 = add i32 %r8, %r4		; <i32> [#uses=1]
-	%r10 = add i32 %r9, %r4.1		; <i32> [#uses=1]
-	ret i32 %r10
-}
diff --git a/test/Transforms/SimplifyLibCalls/fwrite.ll b/test/Transforms/SimplifyLibCalls/fwrite.ll
deleted file mode 100644
index f0f3dcaac6..0000000000
--- a/test/Transforms/SimplifyLibCalls/fwrite.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-%FILE = type { i32 }
-
-@.str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1
-
-define i64 @foo(%FILE* %f) {
-; CHECK: %retval = call i64 @fwrite
-  %retval = call i64 @fwrite(i8* getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0), i64 1, i64 1, %FILE* %f)
-  ret i64 %retval
-}
-
-declare i64 @fwrite(i8*, i64, i64, %FILE *)
diff --git a/test/Transforms/SimplifyLibCalls/iprintf.ll b/test/Transforms/SimplifyLibCalls/iprintf.ll
deleted file mode 100644
index d6a7074db1..0000000000
--- a/test/Transforms/SimplifyLibCalls/iprintf.ll
+++ /dev/null
@@ -1,50 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S -o %t
-; RUN: FileCheck < %t %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
-target triple = "xcore-xmos-elf"
-
-@.str = internal constant [4 x i8] c"%f\0A\00"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
-
-; Verify sprintf with no floating point arguments is transformed to siprintf
-define i32 @f2(i8* %p, i32 %x) nounwind {
-entry:
-; CHECK: define i32 @f2
-; CHECK: @siprintf
-; CHECK: }
-	%0 = tail call i32 (i8*, i8*, ...)* @sprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str1, i32 0, i32 0), i32 %x)
-	ret i32 %0
-}
-
-; Verify we don't turn this into an siprintf call
-define i32 @f3(i8* %p, double %x) nounwind {
-entry:
-; CHECK: define i32 @f3
-; CHECK: @sprintf
-; CHECK: }
-	%0 = tail call i32 (i8*, i8*, ...)* @sprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), double %x)
-	ret i32 %0
-}
-
-; Verify fprintf with no floating point arguments is transformed to fiprintf
-define i32 @f4(i8* %p, i32 %x) nounwind {
-entry:
-; CHECK: define i32 @f4
-; CHECK: @fiprintf
-; CHECK: }
-	%0 = tail call i32 (i8*, i8*, ...)* @fprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str1, i32 0, i32 0), i32 %x)
-	ret i32 %0
-}
-
-; Verify we don't turn this into an fiprintf call
-define i32 @f5(i8* %p, double %x) nounwind {
-entry:
-; CHECK: define i32 @f5
-; CHECK: @fprintf
-; CHECK: }
-	%0 = tail call i32 (i8*, i8*, ...)* @fprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), double %x)
-	ret i32 %0
-}
-
-declare i32 @sprintf(i8* nocapture, i8* nocapture, ...) nounwind
-declare i32 @fprintf(i8* nocapture, i8* nocapture, ...) nounwind
diff --git a/test/tools/llvm-lit/chain.c b/test/tools/llvm-lit/chain.c
new file mode 100644
index 0000000000..6f6541d2e4
--- /dev/null
+++ b/test/tools/llvm-lit/chain.c
@@ -0,0 +1,9 @@
+// This test should fail. lit used to interpret this as:
+//   (false && false) || true
+// instead of the intended
+//   false && (false || true
+//
+// RUN: false
+// RUN: false || true
+//
+// XFAIL: *
diff --git a/test/tools/llvm-lit/lit.local.cfg b/test/tools/llvm-lit/lit.local.cfg
new file mode 100644
index 0000000000..856a54932f
--- /dev/null
+++ b/test/tools/llvm-lit/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.c']
diff --git a/test/tools/llvm-objdump/win64-unwind-data.s b/test/tools/llvm-objdump/win64-unwind-data.s
new file mode 100644
index 0000000000..1e4c7428ce
--- /dev/null
+++ b/test/tools/llvm-objdump/win64-unwind-data.s
@@ -0,0 +1,106 @@
+// This test checks that the unwind data is dumped by llvm-objdump.
+// RUN: llvm-mc -triple x86_64-pc-win32 -filetype=obj %s | llvm-objdump -u - | FileCheck %s
+
+// CHECK:      Unwind info:
+// CHECK:      Function Table:
+// CHECK-NEXT: Start Address: .text
+// CHECK-NEXT: End Address: .text + 0x001b
+// CHECK-NEXT: Unwind Info Address: .xdata
+// CHECK-NEXT: Version: 1
+// CHECK-NEXT: Flags: 1 UNW_ExceptionHandler
+// CHECK-NEXT: Size of prolog: 18
+// CHECK-NEXT: Number of Codes: 8
+// CHECK-NEXT: Frame register: RBX
+// CHECK-NEXT: Frame offset: 0
+// CHECK-NEXT: Unwind Codes:
+// CHECK-NEXT: 0x00: UOP_SetFPReg
+// CHECK-NEXT: 0x0f: UOP_PushNonVol RBX
+// CHECK-NEXT: 0x0e: UOP_SaveXMM128 XMM8 [0x0000]
+// CHECK-NEXT: 0x09: UOP_SaveNonVol RSI [0x0010]
+// CHECK-NEXT: 0x04: UOP_AllocSmall 24
+// CHECK-NEXT: 0x00: UOP_PushMachFrame w/o error code
+// CHECK:      Function Table:
+// CHECK-NEXT: Start Address: .text + 0x0012
+// CHECK-NEXT: End Address: .text + 0x0012
+// CHECK-NEXT: Unwind Info Address: .xdata + 0x001c
+// CHECK-NEXT: Version: 1
+// CHECK-NEXT: Flags: 4 UNW_ChainInfo
+// CHECK-NEXT: Size of prolog: 0
+// CHECK-NEXT: Number of Codes: 0
+// CHECK-NEXT: No frame pointer used
+// CHECK:      Function Table:
+// CHECK-NEXT: Start Address: .text + 0x001b
+// CHECK-NEXT: End Address: .text + 0x001c
+// CHECK-NEXT: Unwind Info Address: .xdata + 0x002c
+// CHECK-NEXT: Version: 1
+// CHECK-NEXT: Flags: 0
+// CHECK-NEXT: Size of prolog: 0
+// CHECK-NEXT: Number of Codes: 0
+// CHECK-NEXT: No frame pointer used
+// CHECK:      Function Table:
+// CHECK-NEXT: Start Address: .text + 0x001c
+// CHECK-NEXT: End Address: .text + 0x0039
+// CHECK-NEXT: Unwind Info Address: .xdata + 0x0034
+// CHECK-NEXT: Version: 1
+// CHECK-NEXT: Flags: 0
+// CHECK-NEXT: Size of prolog: 14
+// CHECK-NEXT: Number of Codes: 6
+// CHECK-NEXT: No frame pointer used
+// CHECK-NEXT: Unwind Codes:
+// CHECK-NEXT: 0x0e: UOP_AllocLarge 8454128
+// CHECK-NEXT: 0x07: UOP_AllocLarge 8190
+// CHECK-NEXT: 0x00: UOP_PushMachFrame w/o error code
+
+    .text
+    .globl func
+    .def func; .scl 2; .type 32; .endef
+    .seh_proc func
+func:
+    .seh_pushframe @code
+    subq $24, %rsp
+    .seh_stackalloc 24
+    movq %rsi, 16(%rsp)
+    .seh_savereg %rsi, 16
+    movups %xmm8, (%rsp)
+    .seh_savexmm %xmm8, 0
+    pushq %rbx
+    .seh_pushreg 3
+    mov %rsp, %rbx
+    .seh_setframe 3, 0
+    .seh_endprologue
+    .seh_handler __C_specific_handler, @except
+    .seh_handlerdata
+    .long 0
+    .text
+    .seh_startchained
+    .seh_endprologue
+    .seh_endchained
+    lea (%rbx), %rsp
+    pop %rbx
+    addq $24, %rsp
+    ret
+    .seh_endproc
+
+// Test emission of small functions.
+    .globl smallFunc
+    .def smallFunc; .scl 2; .type 32; .endef
+    .seh_proc smallFunc
+smallFunc:
+    ret
+    .seh_endproc
+
+// Function with big stack allocation.
+    .globl smallFunc
+    .def allocFunc; .scl 2; .type 32; .endef
+    .seh_proc smallFunc
+allocFunc:
+    .seh_pushframe @code
+    subq $65520, %rsp
+    .seh_stackalloc 65520
+    sub $8454128, %rsp
+    .seh_stackalloc 8454128
+    .seh_endprologue
+    add $8454128, %rsp
+    addq $65520, %rsp
+    ret
+    .seh_endproc
diff --git a/tools/Makefile b/tools/Makefile
index 69f42d9495..cc1eea144f 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -33,10 +33,15 @@ PARALLEL_DIRS := opt llvm-as llvm-dis \
                  lli llvm-extract llvm-mc \
                  bugpoint llvm-bcanalyzer \
                  llvm-diff macho-dump llvm-objdump llvm-readobj \
-	         llvm-rtdyld llvm-dwarfdump llvm-cov llvm-jitlistener \
+	         llvm-rtdyld llvm-dwarfdump llvm-cov \
 	         llvm-size llvm-stress llvm-mcmarkup bc-wrap pso-stub \
 	         llvm-symbolizer
 
+# If Intel JIT Events support is configured, build an extra tool to test it.
+ifeq ($(USE_INTEL_JITEVENTS), 1)
+  PARALLEL_DIRS += llvm-jitlistener
+endif
+
 # Let users override the set of tools to build from the command line.
 ifdef ONLY_TOOLS
   OPTIONAL_PARALLEL_DIRS :=
diff --git a/tools/bugpoint-passes/TestPasses.cpp b/tools/bugpoint-passes/TestPasses.cpp
index 1535b03885..835c397b51 100644
--- a/tools/bugpoint-passes/TestPasses.cpp
+++ b/tools/bugpoint-passes/TestPasses.cpp
@@ -14,10 +14,10 @@
 
 #include "llvm/BasicBlock.h"
 #include "llvm/Constant.h"
+#include "llvm/InstVisitor.h"
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
 #include "llvm/Type.h"
-#include "llvm/Support/InstVisitor.h"
 
 using namespace llvm;
 
diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp
index 21636eabab..d396f7f5ad 100644
--- a/tools/bugpoint/BugDriver.cpp
+++ b/tools/bugpoint/BugDriver.cpp
@@ -18,12 +18,12 @@
 #include "llvm/Linker.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/IRReader.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/IRReader.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Host.h"
 #include <memory>
 using namespace llvm;
 
diff --git a/tools/bugpoint/BugDriver.h b/tools/bugpoint/BugDriver.h
index cc78489e3d..2b621ecc1c 100644
--- a/tools/bugpoint/BugDriver.h
+++ b/tools/bugpoint/BugDriver.h
@@ -18,8 +18,8 @@
 
 #include "llvm/ADT/ValueMap.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include <vector>
 #include <string>
+#include <vector>
 
 namespace llvm {
 
diff --git a/tools/bugpoint/CrashDebugger.cpp b/tools/bugpoint/CrashDebugger.cpp
index 8836eedb47..80e746c518 100644
--- a/tools/bugpoint/CrashDebugger.cpp
+++ b/tools/bugpoint/CrashDebugger.cpp
@@ -12,22 +12,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "BugDriver.h"
-#include "ToolRunner.h"
 #include "ListReducer.h"
+#include "ToolRunner.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/Verifier.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
-#include "llvm/ValueSymbolTable.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/Verifier.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileUtilities.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Support/FileUtilities.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/ValueSymbolTable.h"
 #include <set>
 using namespace llvm;
 
@@ -412,9 +412,7 @@ bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
   // Verify that this is still valid.
   PassManager Passes;
   Passes.add(createVerifierPass());
-  Passes.doInitialization();
   Passes.run(*M);
-  Passes.doFinalization();
 
   // Try running on the hacked up program...
   if (TestFn(BD, M)) {
diff --git a/tools/bugpoint/ExtractFunction.cpp b/tools/bugpoint/ExtractFunction.cpp
index b40b4f10db..805e019c9b 100644
--- a/tools/bugpoint/ExtractFunction.cpp
+++ b/tools/bugpoint/ExtractFunction.cpp
@@ -13,25 +13,25 @@
 //===----------------------------------------------------------------------===//
 
 #include "BugDriver.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/Writer.h"
 #include "llvm/Constants.h"
 #include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
-#include "llvm/PassManager.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/CodeExtractor.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileUtilities.h"
-#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/CodeExtractor.h"
 #include <set>
 using namespace llvm;
 
diff --git a/tools/bugpoint/ListReducer.h b/tools/bugpoint/ListReducer.h
index bd1c5da65c..8083e2d65f 100644
--- a/tools/bugpoint/ListReducer.h
+++ b/tools/bugpoint/ListReducer.h
@@ -15,11 +15,11 @@
 #ifndef BUGPOINT_LIST_REDUCER_H
 #define BUGPOINT_LIST_REDUCER_H
 
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ErrorHandling.h"
-#include <vector>
-#include <cstdlib>
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
+#include <cstdlib>
+#include <vector>
 
 namespace llvm {
   
diff --git a/tools/bugpoint/Miscompilation.cpp b/tools/bugpoint/Miscompilation.cpp
index 82a3a862a2..7b3d709c05 100644
--- a/tools/bugpoint/Miscompilation.cpp
+++ b/tools/bugpoint/Miscompilation.cpp
@@ -15,17 +15,17 @@
 #include "BugDriver.h"
 #include "ListReducer.h"
 #include "ToolRunner.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Config/config.h"   // for HAVE_LINK_R
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Instructions.h"
 #include "llvm/Linker.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileUtilities.h"
-#include "llvm/Config/config.h"   // for HAVE_LINK_R
+#include "llvm/Transforms/Utils/Cloning.h"
 using namespace llvm;
 
 namespace llvm {
diff --git a/tools/bugpoint/OptimizerDriver.cpp b/tools/bugpoint/OptimizerDriver.cpp
index c56911a32e..3d65886398 100644
--- a/tools/bugpoint/OptimizerDriver.cpp
+++ b/tools/bugpoint/OptimizerDriver.cpp
@@ -16,18 +16,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "BugDriver.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/DataLayout.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/ToolOutputFile.h"
 
 #define DONT_GET_PLUGIN_LOADER_OPTION
 #include "llvm/Support/PluginLoader.h"
diff --git a/tools/bugpoint/ToolRunner.cpp b/tools/bugpoint/ToolRunner.cpp
index d975d68d96..e7d3978e26 100644
--- a/tools/bugpoint/ToolRunner.cpp
+++ b/tools/bugpoint/ToolRunner.cpp
@@ -13,12 +13,12 @@
 
 #define DEBUG_TYPE "toolrunner"
 #include "ToolRunner.h"
-#include "llvm/Support/Program.h"
+#include "llvm/Config/config.h"   // for HAVE_LINK_R
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/Program.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Config/config.h"   // for HAVE_LINK_R
 #include <fstream>
 #include <sstream>
 using namespace llvm;
diff --git a/tools/bugpoint/ToolRunner.h b/tools/bugpoint/ToolRunner.h
index 7b93394fd8..bb83ce459e 100644
--- a/tools/bugpoint/ToolRunner.h
+++ b/tools/bugpoint/ToolRunner.h
@@ -20,8 +20,8 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/SystemUtils.h"
 #include <exception>
 #include <vector>
 
diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp
index 8f15b026a5..efc656553b 100644
--- a/tools/bugpoint/bugpoint.cpp
+++ b/tools/bugpoint/bugpoint.cpp
@@ -15,18 +15,18 @@
 
 #include "BugDriver.h"
 #include "ToolRunner.h"
-#include "llvm/LinkAllPasses.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/LinkAllPasses.h"
+#include "llvm/LinkAllVMCore.h"
 #include "llvm/PassManager.h"
-#include "llvm/Support/PassNameParser.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PassNameParser.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/Valgrind.h"
-#include "llvm/LinkAllVMCore.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 
 //Enable this macro to debug bugpoint itself.
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 1c3a01b1e8..1e96762d10 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -14,17 +14,14 @@
 
 #include "llvm/Config/config.h" // plugin-api.h requires HAVE_STDINT_H
 #include "plugin-api.h"
-
 #include "llvm-c/lto.h"
-
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/Support/system_error.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/Errno.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
-
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/system_error.h"
 #include <cerrno>
 #include <cstdlib>
 #include <cstring>
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index bd2fa4c1cd..0f03483fa0 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -14,32 +14,32 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/LLVMContext.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Pass.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/Support/DataStream.h"  // @LOCALMOD
-#include "llvm/Support/IRReader.h"
 #include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/CodeGen/IntrinsicLowering.h" // @LOCALMOD
 #include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
 #include "llvm/CodeGen/LinkAllCodegenComponents.h"
+#include "llvm/DataLayout.h"
 #include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
 #if !defined(__native_client__)
 #include "llvm/Support/PluginLoader.h"
 #endif
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/ToolOutputFile.h"
-#include "llvm/Support/Host.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include <memory>
@@ -80,6 +80,10 @@ InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
 static cl::opt<std::string>
 OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"));
 
+static cl::opt<unsigned>
+TimeCompilations("time-compilations", cl::Hidden, cl::init(1u),
+                 cl::value_desc("N"),
+                 cl::desc("Repeat compilation N times for timing"));
 // @LOCALMOD-BEGIN
 static cl::opt<std::string>
 MetadataTextFilename("metadata-text", cl::desc("Metadata as text, out filename"),
@@ -126,6 +130,8 @@ DisableSimplifyLibCalls("disable-simplify-libcalls",
                         cl::desc("Disable simplify-libcalls"),
                         cl::init(false));
 
+static int compileModule(char**, LLVMContext&);
+
 // GetFileNameRoot - Helper function to get the basename of a filename.
 static inline std::string
 GetFileNameRoot(const std::string &InputFilename) {
@@ -210,11 +216,12 @@ void RecordMetadataForSrpc(const Module &mod) {
   bool is_shared = (mod.getOutputFormat() == Module::SharedOutputFormat);
   std::string soname = mod.getSOName();
   NaClRecordObjectInformation(is_shared, soname);
+  /* TEMPORARY LOCALMOD until we put back lib_iterator
   for (Module::lib_iterator L = mod.lib_begin(),
                             E = mod.lib_end();
        L != E; ++L) {
     NaClRecordSharedLibraryDependency(*L);
-  }
+    } */
 }
 #endif  // defined(__native_client__) && defined(NACL_SRPC)
 // @LOCALMOD-END
@@ -287,6 +294,15 @@ int llc_main(int argc, char **argv) {
 
   cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n");
 
+  // Compile the module TimeCompilations times to give better compile time
+  // metrics.
+  for (unsigned I = TimeCompilations; I; --I)
+    if (int RetVal = compileModule(argv, Context))
+      return RetVal;
+  return 0;
+}
+
+static int compileModule(char **argv, LLVMContext &Context) {
   // Load the module to be compiled...
   SMDiagnostic Err;
   std::auto_ptr<Module> M;
@@ -352,9 +368,10 @@ int llc_main(int argc, char **argv) {
   }
   // Also set PIC_ for dynamic executables:
   // BUG= http://code.google.com/p/nativeclient/issues/detail?id=2351
+  /* TEMPORARY LOCALMOD until we put back lib_iterator
   if (mod->lib_size() > 0) {
     RelocModel = Reloc::PIC_;
-  }
+    } */
 #endif  // defined(__native_client__) && defined(NACL_SRPC)
   // @LOCALMOD-END
 
diff --git a/tools/lli/RemoteTarget.h b/tools/lli/RemoteTarget.h
index d05d3c6f45..b2a6d0ef1d 100644
--- a/tools/lli/RemoteTarget.h
+++ b/tools/lli/RemoteTarget.h
@@ -15,8 +15,8 @@
 #ifndef REMOTEPROCESS_H
 #define REMOTEPROCESS_H
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Memory.h"
 #include <stdlib.h>
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index fa4669dec6..5135d5041f 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -14,11 +14,9 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "lli"
+#include "llvm/LLVMContext.h"
 #include "RecordingMemoryManager.h"
 #include "RemoteTarget.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/CodeGen/LinkAllCodegenComponents.h"
@@ -28,33 +26,26 @@
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/ExecutionEngine/MCJIT.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Memory.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Format.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/TargetSelect.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Type.h"
 #include <cerrno>
 
-#ifdef __linux__
-// These includes used by LLIMCJITMemoryManager::getPointerToNamedFunction()
-// for Glibc trickery. Look comments in this function for more information.
-#ifdef HAVE_SYS_STAT_H
-#include <sys/stat.h>
-#endif
-#include <fcntl.h>
-#include <unistd.h>
-#endif
-
 #ifdef __CYGWIN__
 #include <cygwin/version.h>
 #if defined(CYGWIN_VERSION_DLL_MAJOR) && CYGWIN_VERSION_DLL_MAJOR<1007
@@ -217,214 +208,6 @@ static void do_shutdown() {
 #endif
 }
 
-// Memory manager for MCJIT
-class LLIMCJITMemoryManager : public JITMemoryManager {
-public:
-  SmallVector<sys::MemoryBlock, 16> AllocatedDataMem;
-  SmallVector<sys::MemoryBlock, 16> AllocatedCodeMem;
-  SmallVector<sys::MemoryBlock, 16> FreeCodeMem;
-
-  LLIMCJITMemoryManager() { }
-  ~LLIMCJITMemoryManager();
-
-  virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
-                                       unsigned SectionID);
-
-  virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
-                                       unsigned SectionID, bool IsReadOnly);
-
-  virtual void *getPointerToNamedFunction(const std::string &Name,
-                                          bool AbortOnFailure = true);
-
-  virtual bool applyPermissions(std::string *ErrMsg) { return false; }
-
-  // Invalidate instruction cache for code sections. Some platforms with
-  // separate data cache and instruction cache require explicit cache flush,
-  // otherwise JIT code manipulations (like resolved relocations) will get to
-  // the data cache but not to the instruction cache.
-  virtual void invalidateInstructionCache();
-
-  // The RTDyldMemoryManager doesn't use the following functions, so we don't
-  // need implement them.
-  virtual void setMemoryWritable() {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual void setMemoryExecutable() {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual void setPoisonMemory(bool poison) {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual void AllocateGOT() {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual uint8_t *getGOTBase() const {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual uint8_t *startFunctionBody(const Function *F,
-                                     uintptr_t &ActualSize){
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
-                                unsigned Alignment) {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart,
-                               uint8_t *FunctionEnd) {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual void deallocateFunctionBody(void *Body) {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual uint8_t* startExceptionTable(const Function* F,
-                                       uintptr_t &ActualSize) {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual void endExceptionTable(const Function *F, uint8_t *TableStart,
-                                 uint8_t *TableEnd, uint8_t* FrameRegister) {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual void deallocateExceptionTable(void *ET) {
-    llvm_unreachable("Unexpected call!");
-  }
-};
-
-uint8_t *LLIMCJITMemoryManager::allocateDataSection(uintptr_t Size,
-                                                    unsigned Alignment,
-                                                    unsigned SectionID,
-                                                    bool IsReadOnly) {
-  if (!Alignment)
-    Alignment = 16;
-  // Ensure that enough memory is requested to allow aligning.
-  size_t NumElementsAligned = 1 + (Size + Alignment - 1)/Alignment;
-  uint8_t *Addr = (uint8_t*)calloc(NumElementsAligned, Alignment);
-
-  // Honour the alignment requirement.
-  uint8_t *AlignedAddr = (uint8_t*)RoundUpToAlignment((uint64_t)Addr, Alignment);
-
-  // Store the original address from calloc so we can free it later.
-  AllocatedDataMem.push_back(sys::MemoryBlock(Addr, NumElementsAligned*Alignment));
-  return AlignedAddr;
-}
-
-uint8_t *LLIMCJITMemoryManager::allocateCodeSection(uintptr_t Size,
-                                                    unsigned Alignment,
-                                                    unsigned SectionID) {
-  if (!Alignment)
-    Alignment = 16;
-  unsigned NeedAllocate = Alignment * ((Size + Alignment - 1)/Alignment + 1);
-  uintptr_t Addr = 0;
-  // Look in the list of free code memory regions and use a block there if one
-  // is available.
-  for (int i = 0, e = FreeCodeMem.size(); i != e; ++i) {
-    sys::MemoryBlock &MB = FreeCodeMem[i];
-    if (MB.size() >= NeedAllocate) {
-      Addr = (uintptr_t)MB.base();
-      uintptr_t EndOfBlock = Addr + MB.size();
-      // Align the address.
-      Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
-      // Store cutted free memory block.
-      FreeCodeMem[i] = sys::MemoryBlock((void*)(Addr + Size),
-                                        EndOfBlock - Addr - Size);
-      return (uint8_t*)Addr;
-    }
-  }
-
-  // No pre-allocated free block was large enough. Allocate a new memory region.
-  sys::MemoryBlock MB = sys::Memory::AllocateRWX(NeedAllocate, 0, 0);
-
-  AllocatedCodeMem.push_back(MB);
-  Addr = (uintptr_t)MB.base();
-  uintptr_t EndOfBlock = Addr + MB.size();
-  // Align the address.
-  Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
-  // The AllocateRWX may allocate much more memory than we need. In this case,
-  // we store the unused memory as a free memory block.
-  unsigned FreeSize = EndOfBlock-Addr-Size;
-  if (FreeSize > 16)
-    FreeCodeMem.push_back(sys::MemoryBlock((void*)(Addr + Size), FreeSize));
-
-  // Return aligned address
-  return (uint8_t*)Addr;
-}
-
-void LLIMCJITMemoryManager::invalidateInstructionCache() {
-  for (int i = 0, e = AllocatedCodeMem.size(); i != e; ++i)
-    sys::Memory::InvalidateInstructionCache(AllocatedCodeMem[i].base(),
-                                            AllocatedCodeMem[i].size());
-}
-
-static int jit_noop() {
-  return 0;
-}
-
-void *LLIMCJITMemoryManager::getPointerToNamedFunction(const std::string &Name,
-                                                       bool AbortOnFailure) {
-#if defined(__linux__)
-  //===--------------------------------------------------------------------===//
-  // Function stubs that are invoked instead of certain library calls
-  //
-  // Force the following functions to be linked in to anything that uses the
-  // JIT. This is a hack designed to work around the all-too-clever Glibc
-  // strategy of making these functions work differently when inlined vs. when
-  // not inlined, and hiding their real definitions in a separate archive file
-  // that the dynamic linker can't see. For more info, search for
-  // 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
-  if (Name == "stat") return (void*)(intptr_t)&stat;
-  if (Name == "fstat") return (void*)(intptr_t)&fstat;
-  if (Name == "lstat") return (void*)(intptr_t)&lstat;
-  if (Name == "stat64") return (void*)(intptr_t)&stat64;
-  if (Name == "fstat64") return (void*)(intptr_t)&fstat64;
-  if (Name == "lstat64") return (void*)(intptr_t)&lstat64;
-  if (Name == "atexit") return (void*)(intptr_t)&atexit;
-  if (Name == "mknod") return (void*)(intptr_t)&mknod;
-#endif // __linux__
-
-  // We should not invoke parent's ctors/dtors from generated main()!
-  // On Mingw and Cygwin, the symbol __main is resolved to
-  // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
-  // (and register wrong callee's dtors with atexit(3)).
-  // We expect ExecutionEngine::runStaticConstructorsDestructors()
-  // is called before ExecutionEngine::runFunctionAsMain() is called.
-  if (Name == "__main") return (void*)(intptr_t)&jit_noop;
-
-  const char *NameStr = Name.c_str();
-  void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
-  if (Ptr) return Ptr;
-
-  // If it wasn't found and if it starts with an underscore ('_') character,
-  // try again without the underscore.
-  if (NameStr[0] == '_') {
-    Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
-    if (Ptr) return Ptr;
-  }
-
-  if (AbortOnFailure)
-    report_fatal_error("Program used external function '" + Name +
-                      "' which could not be resolved!");
-  return 0;
-}
-
-LLIMCJITMemoryManager::~LLIMCJITMemoryManager() {
-  for (unsigned i = 0, e = AllocatedCodeMem.size(); i != e; ++i)
-    sys::Memory::ReleaseRWX(AllocatedCodeMem[i]);
-  for (unsigned i = 0, e = AllocatedDataMem.size(); i != e; ++i)
-    free(AllocatedDataMem[i].base());
-}
-
-
 void layoutRemoteTargetMemory(RemoteTarget *T, RecordingMemoryManager *JMM) {
   // Lay out our sections in order, with all the code sections first, then
   // all the data sections.
@@ -564,7 +347,7 @@ int main(int argc, char **argv, char * const *envp) {
     if (RemoteMCJIT)
       JMM = new RecordingMemoryManager();
     else
-      JMM = new LLIMCJITMemoryManager();
+      JMM = new SectionMemoryManager();
     builder.setJITMemoryManager(JMM);
   } else {
     if (RemoteMCJIT) {
@@ -665,8 +448,13 @@ int main(int argc, char **argv, char * const *envp) {
   // MCJIT itself. FIXME.
   //
   // Run static constructors.
-  if (!RemoteMCJIT)
-    EE->runStaticConstructorsDestructors(false);
+  if (!RemoteMCJIT) {
+      if (UseMCJIT && !ForceInterpreter) {
+        // Give MCJIT a chance to apply relocations and set page permissions.
+        EE->finalizeObject();
+      }
+      EE->runStaticConstructorsDestructors(false);
+  }
 
   if (NoLazyCompilation) {
     for (Module::iterator I = Mod->begin(), E = Mod->end(); I != E; ++I) {
@@ -713,7 +501,7 @@ int main(int argc, char **argv, char * const *envp) {
     (void)EE->getPointerToFunction(EntryFn);
     // Clear instruction cache before code will be executed.
     if (JMM)
-      static_cast<LLIMCJITMemoryManager*>(JMM)->invalidateInstructionCache();
+      static_cast<SectionMemoryManager*>(JMM)->invalidateInstructionCache();
 
     // Run main.
     Result = EE->runFunctionAsMain(EntryFn, InputArgv, envp);
diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp
index a8a5013a9a..c66c89f6ed 100644
--- a/tools/llvm-ar/llvm-ar.cpp
+++ b/tools/llvm-ar/llvm-ar.cpp
@@ -13,19 +13,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
 #include "llvm/Bitcode/Archive.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cstdlib>
-#include <memory>
 #include <fstream>
+#include <memory>
 using namespace llvm;
 
 // Option for compatibility with AIX, not used but must allow it to be present.
diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp
index 1def9a4a2d..2aa57d6760 100644
--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@@ -16,17 +16,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Assembly/Parser.h"
 #include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/Parser.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/ToolOutputFile.h"
-#include "llvm/Support/Signals.h"
 #include <memory>
 using namespace llvm;
 
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 8109ca4d5b..46b687e1c2 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -37,12 +37,11 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
-
-#include <map>
 #include <algorithm>
+#include <map>
 using namespace llvm;
 
 static cl::opt<std::string>
@@ -150,7 +149,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::MODULE_CODE_DATALAYOUT:  return "DATALAYOUT";
     case bitc::MODULE_CODE_ASM:         return "ASM";
     case bitc::MODULE_CODE_SECTIONNAME: return "SECTIONNAME";
-    case bitc::MODULE_CODE_DEPLIB:      return "DEPLIB";
+    case bitc::MODULE_CODE_DEPLIB:      return "DEPLIB"; // FIXME: Remove in 4.0
     case bitc::MODULE_CODE_GLOBALVAR:   return "GLOBALVAR";
     case bitc::MODULE_CODE_FUNCTION:    return "FUNCTION";
     case bitc::MODULE_CODE_ALIAS:       return "ALIAS";
diff --git a/tools/llvm-diff/DiffConsumer.cpp b/tools/llvm-diff/DiffConsumer.cpp
index 91c1699bee..592ae702fa 100644
--- a/tools/llvm-diff/DiffConsumer.cpp
+++ b/tools/llvm-diff/DiffConsumer.cpp
@@ -12,9 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "DiffConsumer.h"
-
-#include "llvm/Module.h"
 #include "llvm/Instructions.h"
+#include "llvm/Module.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
diff --git a/tools/llvm-diff/DiffConsumer.h b/tools/llvm-diff/DiffConsumer.h
index 98e369ba97..6c2209f27e 100644
--- a/tools/llvm-diff/DiffConsumer.h
+++ b/tools/llvm-diff/DiffConsumer.h
@@ -15,12 +15,11 @@
 #define _LLVM_DIFFCONSUMER_H_
 
 #include "DiffLog.h"
-
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
   class Module;
diff --git a/tools/llvm-diff/DiffLog.cpp b/tools/llvm-diff/DiffLog.cpp
index 9cc0c889fd..5f06a1614b 100644
--- a/tools/llvm-diff/DiffLog.cpp
+++ b/tools/llvm-diff/DiffLog.cpp
@@ -13,10 +13,9 @@
 
 #include "DiffLog.h"
 #include "DiffConsumer.h"
-
-#include "llvm/Instructions.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Instructions.h"
 
 using namespace llvm;
 
diff --git a/tools/llvm-diff/DifferenceEngine.cpp b/tools/llvm-diff/DifferenceEngine.cpp
index 0c1e30c987..9cf1f12715 100644
--- a/tools/llvm-diff/DifferenceEngine.cpp
+++ b/tools/llvm-diff/DifferenceEngine.cpp
@@ -13,22 +13,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "DifferenceEngine.h"
-
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
-#include "llvm/Support/CallSite.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CallSite.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/type_traits.h"
-
 #include <utility>
 
 using namespace llvm;
diff --git a/tools/llvm-diff/DifferenceEngine.h b/tools/llvm-diff/DifferenceEngine.h
index 0246d8ff5d..73bf6eb6ea 100644
--- a/tools/llvm-diff/DifferenceEngine.h
+++ b/tools/llvm-diff/DifferenceEngine.h
@@ -15,11 +15,10 @@
 #ifndef _LLVM_DIFFERENCE_ENGINE_H_
 #define _LLVM_DIFFERENCE_ENGINE_H_
 
+#include "DiffConsumer.h"
+#include "DiffLog.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "DiffLog.h"
-#include "DiffConsumer.h"
-
 #include <utility>
 
 namespace llvm {
diff --git a/tools/llvm-diff/llvm-diff.cpp b/tools/llvm-diff/llvm-diff.cpp
index 45957b3f8c..f381dae630 100644
--- a/tools/llvm-diff/llvm-diff.cpp
+++ b/tools/llvm-diff/llvm-diff.cpp
@@ -13,19 +13,17 @@
 
 #include "DiffLog.h"
 #include "DifferenceEngine.h"
-
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/IRReader.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SourceMgr.h"
-
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Type.h"
 #include <string>
 #include <utility>
 
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index 75ceda61ad..6b0a47960b 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -17,21 +17,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/LLVMContext.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
+#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/DebugInfo.h"
-#include "llvm/Module.h"
-#include "llvm/Type.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Assembly/AssemblyAnnotationWriter.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/DataStream.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/system_error.h"
+#include "llvm/Type.h"
 using namespace llvm;
 
 static cl::opt<std::string>
diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index 2229a3aa98..34302fa745 100644
--- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -12,11 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Triple.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/DebugInfo/DIContext.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Object/RelocVisitor.h"
-#include "llvm/DebugInfo/DIContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp
index 0e280c1780..a070370662 100644
--- a/tools/llvm-extract/llvm-extract.cpp
+++ b/tools/llvm-extract/llvm-extract.cpp
@@ -13,23 +13,23 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Transforms/IPO.h"
 #include "llvm/DataLayout.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h" // @LOCALMOD
 #include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/ToolOutputFile.h"
-#include "llvm/Support/SystemUtils.h"
-#include "llvm/Support/Signals.h"
 #include "llvm/Support/Regex.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Transforms/IPO.h"
 #include <memory>
 using namespace llvm;
 
@@ -323,9 +323,7 @@ int main(int argc, char **argv) {
   else if (Force || !CheckBitcodeOutputToConsole(Out.os(), true))
     Passes.add(createBitcodeWriterPass(Out.os()));
 
-  Passes.doInitialization();
   Passes.run(*M.get());
-  Passes.doFinalization();
 
   // Declare success.
   Out.keep();
diff --git a/tools/llvm-jitlistener/llvm-jitlistener.cpp b/tools/llvm-jitlistener/llvm-jitlistener.cpp
index 2b05e66e98..0be525a13e 100644
--- a/tools/llvm-jitlistener/llvm-jitlistener.cpp
+++ b/tools/llvm-jitlistener/llvm-jitlistener.cpp
@@ -13,14 +13,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/LLVMContext.h"
+#include "../../lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/Triple.h"
-#include "../../lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/ExecutionEngine/MCJIT.h"
 #include "llvm/ExecutionEngine/ObjectImage.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Host.h"
diff --git a/tools/llvm-link/llvm-link.cpp b/tools/llvm-link/llvm-link.cpp
index 378a83368f..d02fca2ece 100644
--- a/tools/llvm-link/llvm-link.cpp
+++ b/tools/llvm-link/llvm-link.cpp
@@ -13,18 +13,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Linker.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/ToolOutputFile.h"
-#include "llvm/Support/SystemUtils.h"
-#include "llvm/Support/IRReader.h"
 #include "llvm/Support/Signals.h"
-#include "llvm/Support/Path.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include <memory>
 using namespace llvm;
 
diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp
index 5f2fdb8071..54cf4e2edc 100644
--- a/tools/llvm-mc/Disassembler.cpp
+++ b/tools/llvm-mc/Disassembler.cpp
@@ -17,12 +17,12 @@
 #include "../../lib/MC/MCDisassembler/EDInst.h"
 #include "../../lib/MC/MCDisassembler/EDOperand.h"
 #include "../../lib/MC/MCDisassembler/EDToken.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Triple.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/SourceMgr.h"
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index 8329a41f25..15cacfabeb 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -12,36 +12,36 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/MC/MCParser/AsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "Disassembler.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/MC/SubtargetFeature.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/ToolOutputFile.h"
-#include "llvm/Support/Host.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/system_error.h"
-#include "Disassembler.h"
 using namespace llvm;
 
 static cl::opt<std::string>
@@ -162,7 +162,8 @@ enum ActionType {
   AC_Assemble,
   AC_Disassemble,
   AC_EDisassemble,
-  AC_MDisassemble
+  AC_MDisassemble,
+  AC_HDisassemble
 };
 
 static cl::opt<ActionType>
@@ -178,6 +179,9 @@ Action(cl::desc("Action to perform:"),
                              "Enhanced disassembly of strings of hex bytes"),
                   clEnumValN(AC_MDisassemble, "mdis",
                              "Marked up disassembly of strings of hex bytes"),
+                  clEnumValN(AC_HDisassemble, "hdis",
+                             "Disassemble strings of hex bytes printing "
+                             "immediates as hex"),
                   clEnumValEnd));
 
 static const Target *GetTarget(const char *ProgName) {
@@ -437,6 +441,7 @@ int main(int argc, char **argv) {
   }
 
   int Res = 1;
+  bool disassemble = false;
   switch (Action) {
   case AC_AsLex:
     Res = AsLexInput(SrcMgr, *MAI, Out.get());
@@ -446,15 +451,22 @@ int main(int argc, char **argv) {
     break;
   case AC_MDisassemble:
     IP->setUseMarkup(1);
-    // Fall through to do disassembly.
+    disassemble = true;
+    break;
+  case AC_HDisassemble:
+    IP->setPrintImmHex(1);
+    disassemble = true;
+    break;
   case AC_Disassemble:
-    Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str,
-                                    *Buffer, SrcMgr, Out->os());
+    disassemble = true;
     break;
   case AC_EDisassemble:
     Res =  Disassembler::disassembleEnhanced(TripleName, *Buffer, SrcMgr, Out->os());
     break;
   }
+  if (disassemble)
+    Res = Disassembler::disassemble(*TheTarget, TripleName, *STI, *Str,
+                                    *Buffer, SrcMgr, Out->os());
 
   // Keep output if no errors.
   if (Res == 0) Out->keep();
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index 27efd74264..83cf1d3815 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -17,20 +17,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Bitcode/Archive.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Module.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Program.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
-#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 #include <algorithm>
 #include <cctype>
diff --git a/tools/llvm-objdump/CMakeLists.txt b/tools/llvm-objdump/CMakeLists.txt
index f3b2e1fe41..5001435e83 100644
--- a/tools/llvm-objdump/CMakeLists.txt
+++ b/tools/llvm-objdump/CMakeLists.txt
@@ -9,6 +9,7 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_tool(llvm-objdump
   llvm-objdump.cpp
+  COFFDump.cpp
   MachODump.cpp
   MCFunction.cpp
   )
diff --git a/tools/llvm-objdump/COFFDump.cpp b/tools/llvm-objdump/COFFDump.cpp
new file mode 100644
index 0000000000..30faecbb19
--- /dev/null
+++ b/tools/llvm-objdump/COFFDump.cpp
@@ -0,0 +1,355 @@
+//===-- COFFDump.cpp - COFF-specific dumper ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the COFF-specific dumper for llvm-objdump.
+/// It outputs the Win64 EH data structures as plain text.
+/// The encoding of the unwind codes is decribed in MSDN:
+/// http://msdn.microsoft.com/en-us/library/ck9asaa9.aspx
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm-objdump.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/Win64EH.h"
+#include <algorithm>
+#include <cstring>
+
+using namespace llvm;
+using namespace object;
+using namespace llvm::Win64EH;
+
+// Returns the name of the unwind code.
+static StringRef getUnwindCodeTypeName(uint8_t Code) {
+  switch(Code) {
+  default: llvm_unreachable("Invalid unwind code");
+  case UOP_PushNonVol: return "UOP_PushNonVol";
+  case UOP_AllocLarge: return "UOP_AllocLarge";
+  case UOP_AllocSmall: return "UOP_AllocSmall";
+  case UOP_SetFPReg: return "UOP_SetFPReg";
+  case UOP_SaveNonVol: return "UOP_SaveNonVol";
+  case UOP_SaveNonVolBig: return "UOP_SaveNonVolBig";
+  case UOP_SaveXMM128: return "UOP_SaveXMM128";
+  case UOP_SaveXMM128Big: return "UOP_SaveXMM128Big";
+  case UOP_PushMachFrame: return "UOP_PushMachFrame";
+  }
+}
+
+// Returns the name of a referenced register.
+static StringRef getUnwindRegisterName(uint8_t Reg) {
+  switch(Reg) {
+  default: llvm_unreachable("Invalid register");
+  case 0: return "RAX";
+  case 1: return "RCX";
+  case 2: return "RDX";
+  case 3: return "RBX";
+  case 4: return "RSP";
+  case 5: return "RBP";
+  case 6: return "RSI";
+  case 7: return "RDI";
+  case 8: return "R8";
+  case 9: return "R9";
+  case 10: return "R10";
+  case 11: return "R11";
+  case 12: return "R12";
+  case 13: return "R13";
+  case 14: return "R14";
+  case 15: return "R15";
+  }
+}
+
+// Calculates the number of array slots required for the unwind code.
+static unsigned getNumUsedSlots(const UnwindCode &UnwindCode) {
+  switch (UnwindCode.getUnwindOp()) {
+  default: llvm_unreachable("Invalid unwind code");
+  case UOP_PushNonVol:
+  case UOP_AllocSmall:
+  case UOP_SetFPReg:
+  case UOP_PushMachFrame:
+    return 1;
+  case UOP_SaveNonVol:
+  case UOP_SaveXMM128:
+    return 2;
+  case UOP_SaveNonVolBig:
+  case UOP_SaveXMM128Big:
+    return 3;
+  case UOP_AllocLarge:
+    return (UnwindCode.getOpInfo() == 0) ? 2 : 3;
+  }
+}
+
+// Prints one unwind code. Because an unwind code can occupy up to 3 slots in
+// the unwind codes array, this function requires that the correct number of
+// slots is provided.
+static void printUnwindCode(ArrayRef<UnwindCode> UCs) {
+  assert(UCs.size() >= getNumUsedSlots(UCs[0]));
+  outs() <<  format("    0x%02x: ", unsigned(UCs[0].u.CodeOffset))
+         << getUnwindCodeTypeName(UCs[0].getUnwindOp());
+  switch (UCs[0].getUnwindOp()) {
+  case UOP_PushNonVol:
+    outs() << " " << getUnwindRegisterName(UCs[0].getOpInfo());
+    break;
+  case UOP_AllocLarge:
+    if (UCs[0].getOpInfo() == 0) {
+      outs() << " " << UCs[1].FrameOffset;
+    } else {
+      outs() << " " << UCs[1].FrameOffset
+                       + (static_cast<uint32_t>(UCs[2].FrameOffset) << 16);
+    }
+    break;
+  case UOP_AllocSmall:
+    outs() << " " << ((UCs[0].getOpInfo() + 1) * 8);
+    break;
+  case UOP_SetFPReg:
+    outs() << " ";
+    break;
+  case UOP_SaveNonVol:
+    outs() << " " << getUnwindRegisterName(UCs[0].getOpInfo())
+           << format(" [0x%04x]", 8 * UCs[1].FrameOffset);
+    break;
+  case UOP_SaveNonVolBig:
+    outs() << " " << getUnwindRegisterName(UCs[0].getOpInfo())
+           << format(" [0x%08x]", UCs[1].FrameOffset
+                    + (static_cast<uint32_t>(UCs[2].FrameOffset) << 16));
+    break;
+  case UOP_SaveXMM128:
+    outs() << " XMM" << static_cast<uint32_t>(UCs[0].getOpInfo())
+           << format(" [0x%04x]", 16 * UCs[1].FrameOffset);
+    break;
+  case UOP_SaveXMM128Big:
+    outs() << " XMM" << UCs[0].getOpInfo()
+           << format(" [0x%08x]", UCs[1].FrameOffset
+                           + (static_cast<uint32_t>(UCs[2].FrameOffset) << 16));
+    break;
+  case UOP_PushMachFrame:
+    outs() << " " << (UCs[0].getOpInfo() ? "w/o" : "w")
+           << " error code";
+    break;
+  }
+  outs() << "\n";
+}
+
+static void printAllUnwindCodes(ArrayRef<UnwindCode> UCs) {
+  for (const UnwindCode *I = UCs.begin(), *E = UCs.end(); I < E; ) {
+    unsigned UsedSlots = getNumUsedSlots(*I);
+    if (UsedSlots > UCs.size()) {
+      outs() << "Unwind data corrupted: Encountered unwind op "
+             << getUnwindCodeTypeName((*I).getUnwindOp())
+             << " which requires " << UsedSlots
+             << " slots, but only " << UCs.size()
+             << " remaining in buffer";
+      return ;
+    }
+    printUnwindCode(ArrayRef<UnwindCode>(I, E));
+    I += UsedSlots;
+  }
+}
+
+// Given a symbol sym this functions returns the address and section of it.
+static error_code resolveSectionAndAddress(const COFFObjectFile *Obj,
+                                           const SymbolRef &Sym,
+                                           const coff_section *&ResolvedSection,
+                                           uint64_t &ResolvedAddr) {
+  if (error_code ec = Sym.getAddress(ResolvedAddr)) return ec;
+  section_iterator iter(Obj->begin_sections());
+  if (error_code ec = Sym.getSection(iter)) return ec;
+  ResolvedSection = Obj->getCOFFSection(iter);
+  return object_error::success;
+}
+
+// Given a vector of relocations for a section and an offset into this section
+// the function returns the symbol used for the relocation at the offset.
+static error_code resolveSymbol(const std::vector<RelocationRef> &Rels,
+                                uint64_t Offset, SymbolRef &Sym) {
+  for (std::vector<RelocationRef>::const_iterator I = Rels.begin(),
+                                                  E = Rels.end();
+                                                  I != E; ++I) {
+    uint64_t Ofs;
+    if (error_code ec = I->getOffset(Ofs)) return ec;
+    if (Ofs == Offset) {
+      if (error_code ec = I->getSymbol(Sym)) return ec;
+      break;
+    }
+  }
+  return object_error::success;
+}
+
+// Given a vector of relocations for a section and an offset into this section
+// the function resolves the symbol used for the relocation at the offset and
+// returns the section content and the address inside the content pointed to
+// by the symbol.
+static error_code getSectionContents(const COFFObjectFile *Obj,
+                                     const std::vector<RelocationRef> &Rels,
+                                     uint64_t Offset,
+                                     ArrayRef<uint8_t> &Contents,
+                                     uint64_t &Addr) {
+  SymbolRef Sym;
+  if (error_code ec = resolveSymbol(Rels, Offset, Sym)) return ec;
+  const coff_section *Section;
+  if (error_code ec = resolveSectionAndAddress(Obj, Sym, Section, Addr))
+    return ec;
+  if (error_code ec = Obj->getSectionContents(Section, Contents)) return ec;
+  return object_error::success;
+}
+
+// Given a vector of relocations for a section and an offset into this section
+// the function returns the name of the symbol used for the relocation at the
+// offset.
+static error_code resolveSymbolName(const std::vector<RelocationRef> &Rels,
+                                    uint64_t Offset, StringRef &Name) {
+  SymbolRef Sym;
+  if (error_code ec = resolveSymbol(Rels, Offset, Sym)) return ec;
+  if (error_code ec = Sym.getName(Name)) return ec;
+  return object_error::success;
+}
+
+static void printCOFFSymbolAddress(llvm::raw_ostream &Out,
+                                   const std::vector<RelocationRef> &Rels,
+                                   uint64_t Offset, uint32_t Disp) {
+  StringRef Sym;
+  if (error_code ec = resolveSymbolName(Rels, Offset, Sym)) {
+    error(ec);
+    return ;
+  }
+  Out << Sym;
+  if (Disp > 0)
+    Out << format(" + 0x%04x", Disp);
+}
+
+void llvm::printCOFFUnwindInfo(const COFFObjectFile *Obj) {
+  const coff_file_header *Header;
+  if (error(Obj->getHeader(Header))) return;
+
+  if (Header->Machine != COFF::IMAGE_FILE_MACHINE_AMD64) {
+    errs() << "Unsupported image machine type "
+              "(currently only AMD64 is supported).\n";
+    return;
+  }
+
+  const coff_section *Pdata = 0;
+
+  error_code ec;
+  for (section_iterator SI = Obj->begin_sections(),
+                        SE = Obj->end_sections();
+                        SI != SE; SI.increment(ec)) {
+    if (error(ec)) return;
+
+    StringRef Name;
+    if (error(SI->getName(Name))) continue;
+
+    if (Name != ".pdata") continue;
+
+    Pdata = Obj->getCOFFSection(SI);
+    std::vector<RelocationRef> Rels;
+    for (relocation_iterator RI = SI->begin_relocations(),
+                             RE = SI->end_relocations();
+                             RI != RE; RI.increment(ec)) {
+      if (error(ec)) break;
+      Rels.push_back(*RI);
+    }
+
+    // Sort relocations by address.
+    std::sort(Rels.begin(), Rels.end(), RelocAddressLess);
+
+    ArrayRef<uint8_t> Contents;
+    if (error(Obj->getSectionContents(Pdata, Contents))) continue;
+    if (Contents.empty()) continue;
+
+    ArrayRef<RuntimeFunction> RFs(
+                  reinterpret_cast<const RuntimeFunction *>(Contents.data()),
+                                  Contents.size() / sizeof(RuntimeFunction));
+    for (const RuntimeFunction *I = RFs.begin(), *E = RFs.end(); I < E; ++I) {
+      const uint64_t SectionOffset = std::distance(RFs.begin(), I)
+                                     * sizeof(RuntimeFunction);
+
+      outs() << "Function Table:\n";
+
+      outs() << "  Start Address: ";
+      printCOFFSymbolAddress(outs(), Rels, SectionOffset +
+                             /*offsetof(RuntimeFunction, StartAddress)*/ 0,
+                             I->StartAddress);
+      outs() << "\n";
+
+      outs() << "  End Address: ";
+      printCOFFSymbolAddress(outs(), Rels, SectionOffset +
+                             /*offsetof(RuntimeFunction, EndAddress)*/ 4,
+                             I->EndAddress);
+      outs() << "\n";
+
+      outs() << "  Unwind Info Address: ";
+      printCOFFSymbolAddress(outs(), Rels, SectionOffset +
+                             /*offsetof(RuntimeFunction, UnwindInfoOffset)*/ 8,
+                             I->UnwindInfoOffset);
+      outs() << "\n";
+
+      ArrayRef<uint8_t> XContents;
+      uint64_t UnwindInfoOffset = 0;
+      if (error(getSectionContents(Obj, Rels, SectionOffset +
+                              /*offsetof(RuntimeFunction, UnwindInfoOffset)*/ 8,
+                                   XContents, UnwindInfoOffset))) continue;
+      if (XContents.empty()) continue;
+
+      UnwindInfoOffset += I->UnwindInfoOffset;
+      if (UnwindInfoOffset > XContents.size()) continue;
+
+      const Win64EH::UnwindInfo *UI =
+                            reinterpret_cast<const Win64EH::UnwindInfo *>
+                              (XContents.data() + UnwindInfoOffset);
+
+      // The casts to int are required in order to output the value as number.
+      // Without the casts the value would be interpreted as char data (which
+      // results in garbage output).
+      outs() << "  Version: " << static_cast<int>(UI->getVersion()) << "\n";
+      outs() << "  Flags: " << static_cast<int>(UI->getFlags());
+      if (UI->getFlags()) {
+          if (UI->getFlags() & UNW_ExceptionHandler)
+            outs() << " UNW_ExceptionHandler";
+          if (UI->getFlags() & UNW_TerminateHandler)
+            outs() << " UNW_TerminateHandler";
+          if (UI->getFlags() & UNW_ChainInfo)
+            outs() << " UNW_ChainInfo";
+      }
+      outs() << "\n";
+      outs() << "  Size of prolog: "
+             << static_cast<int>(UI->PrologSize) << "\n";
+      outs() << "  Number of Codes: "
+             << static_cast<int>(UI->NumCodes) << "\n";
+      // Maybe this should move to output of UOP_SetFPReg?
+      if (UI->getFrameRegister()) {
+        outs() << "  Frame register: "
+                << getUnwindRegisterName(UI->getFrameRegister())
+                << "\n";
+        outs() << "  Frame offset: "
+                << 16 * UI->getFrameOffset()
+                << "\n";
+      } else {
+        outs() << "  No frame pointer used\n";
+      }
+      if (UI->getFlags() & (UNW_ExceptionHandler | UNW_TerminateHandler)) {
+        // FIXME: Output exception handler data
+      } else if (UI->getFlags() & UNW_ChainInfo) {
+        // FIXME: Output chained unwind info
+      }
+
+      if (UI->NumCodes)
+        outs() << "  Unwind Codes:\n";
+
+      printAllUnwindCodes(ArrayRef<UnwindCode>(&UI->UnwindCodes[0],
+                          UI->NumCodes));
+
+      outs() << "\n\n";
+      outs().flush();
+    }
+  }
+}
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index 46e71ceb4d..3a350382ae 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -13,11 +13,9 @@
 
 #include "llvm-objdump.h"
 #include "MCFunction.h"
-#include "llvm/Support/MachO.h"
-#include "llvm/Object/MachO.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Triple.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCDisassembler.h"
@@ -28,10 +26,12 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Object/MachO.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/MachO.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index ddfcca3938..2838a2a2b3 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -15,13 +15,10 @@
 
 #include "llvm-objdump.h"
 #include "MCFunction.h"
-#include "llvm/Object/Archive.h"
-#include "llvm/Object/COFF.h"
-#include "llvm/Object/ObjectFile.h"
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
@@ -29,6 +26,9 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -104,9 +104,16 @@ static cl::opt<bool>
 NoShowRawInsn("no-show-raw-insn", cl::desc("When disassembling instructions, "
                                            "do not print the instruction bytes."));
 
+static cl::opt<bool>
+UnwindInfo("unwind-info", cl::desc("Display unwind information"));
+
+static cl::alias
+UnwindInfoShort("u", cl::desc("Alias for --unwind-info"),
+                cl::aliasopt(UnwindInfo));
+
 static StringRef ToolName;
 
-static bool error(error_code ec) {
+bool llvm::error(error_code ec) {
   if (!ec) return false;
 
   outs() << ToolName << ": error reading file: " << ec.message() << ".\n";
@@ -165,7 +172,7 @@ void llvm::DumpBytes(StringRef bytes) {
   outs() << output;
 }
 
-static bool RelocAddressLess(RelocationRef a, RelocationRef b) {
+bool llvm::RelocAddressLess(RelocationRef a, RelocationRef b) {
   uint64_t a_addr, b_addr;
   if (error(a.getAddress(a_addr))) return false;
   if (error(b.getAddress(b_addr))) return false;
@@ -573,6 +580,19 @@ static void PrintSymbolTable(const ObjectFile *o) {
   }
 }
 
+static void PrintUnwindInfo(const ObjectFile *o) {
+  outs() << "Unwind info:\n\n";
+
+  if (const COFFObjectFile *coff = dyn_cast<COFFObjectFile>(o)) {
+    printCOFFUnwindInfo(coff);
+  } else {
+    // TODO: Extract DWARF dump tool to objdump.
+    errs() << "This operation is only currently supported "
+              "for COFF object files.\n";
+    return;
+  }
+}
+
 static void DumpObject(const ObjectFile *o) {
   outs() << '\n';
   outs() << o->getFileName()
@@ -588,6 +608,8 @@ static void DumpObject(const ObjectFile *o) {
     PrintSectionContents(o);
   if (SymbolTable)
     PrintSymbolTable(o);
+  if (UnwindInfo)
+    PrintUnwindInfo(o);
 }
 
 /// @brief Dump each object file in \a a;
@@ -666,7 +688,8 @@ int main(int argc, char **argv) {
       && !Relocations
       && !SectionHeaders
       && !SectionContents
-      && !SymbolTable) {
+      && !SymbolTable
+      && !UnwindInfo) {
     cl::PrintHelpMessage();
     return 2;
   }
diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h
index aa71b77c8a..9f5a8c3db9 100644
--- a/tools/llvm-objdump/llvm-objdump.h
+++ b/tools/llvm-objdump/llvm-objdump.h
@@ -17,12 +17,21 @@
 
 namespace llvm {
 
+namespace object {
+  class COFFObjectFile;
+  class RelocationRef;
+}
+class error_code;
+
 extern cl::opt<std::string> TripleName;
 extern cl::opt<std::string> ArchName;
 
 // Various helper functions.
+bool error(error_code ec);
+bool RelocAddressLess(object::RelocationRef a, object::RelocationRef b);
 void DumpBytes(StringRef bytes);
 void DisassembleInputMachO(StringRef Filename);
+void printCOFFUnwindInfo(const object::COFFObjectFile* o);
 
 class StringRefMemoryObject : public MemoryObject {
   virtual void anchor();
diff --git a/tools/llvm-prof/llvm-prof.cpp b/tools/llvm-prof/llvm-prof.cpp
index 940ac340e7..a2bfae6e8d 100644
--- a/tools/llvm-prof/llvm-prof.cpp
+++ b/tools/llvm-prof/llvm-prof.cpp
@@ -13,23 +13,23 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/InstrTypes.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
-#include "llvm/Assembly/AssemblyAnnotationWriter.h"
+#include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Analysis/ProfileInfoLoader.h"
-#include "llvm/Analysis/Passes.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Format.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 #include <algorithm>
 #include <iomanip>
@@ -287,9 +287,7 @@ int main(int argc, char **argv) {
   PassManager PassMgr;
   PassMgr.add(createProfileLoaderPass(ProfileDataFile));
   PassMgr.add(new ProfileInfoPrinterPass(PIL));
-  PassMgr.doInitialization();
   PassMgr.run(*M);
-  PassMgr.doFinalization();
 
   return 0;
 }
diff --git a/tools/llvm-ranlib/llvm-ranlib.cpp b/tools/llvm-ranlib/llvm-ranlib.cpp
index d2f5f0fff9..7755628777 100644
--- a/tools/llvm-ranlib/llvm-ranlib.cpp
+++ b/tools/llvm-ranlib/llvm-ranlib.cpp
@@ -12,15 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
 #include "llvm/Bitcode/Archive.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
 #include <memory>
 using namespace llvm;
 
diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp
index 3be12899ae..40e3b8c933 100644
--- a/tools/llvm-readobj/llvm-readobj.cpp
+++ b/tools/llvm-readobj/llvm-readobj.cpp
@@ -16,16 +16,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Object/ELF.h"
-#include "llvm/Analysis/Verifier.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/Support/Format.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Signals.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
 
 using namespace llvm;
 using namespace llvm::object;
diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp
index e06d798cd5..ec63c9b56c 100644
--- a/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
-#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ExecutionEngine/ObjectBuffer.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/Object/MachOObject.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
diff --git a/tools/llvm-size/llvm-size.cpp b/tools/llvm-size/llvm-size.cpp
index 462da40afb..3de6605285 100644
--- a/tools/llvm-size/llvm-size.cpp
+++ b/tools/llvm-size/llvm-size.cpp
@@ -23,8 +23,8 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
 #include <algorithm>
 #include <string>
diff --git a/tools/llvm-stress/llvm-stress.cpp b/tools/llvm-stress/llvm-stress.cpp
index 72fdac87b4..ce798f7b2e 100644
--- a/tools/llvm-stress/llvm-stress.cpp
+++ b/tools/llvm-stress/llvm-stress.cpp
@@ -12,24 +12,24 @@
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/CallGraphSCCPass.h"
 #include "llvm/Constants.h"
 #include "llvm/Instruction.h"
-#include "llvm/CallGraphSCCPass.h"
-#include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Support/PassNameParser.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PassNameParser.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include <algorithm>
 #include <memory>
-#include <sstream>
 #include <set>
+#include <sstream>
 #include <vector>
-#include <algorithm>
 using namespace llvm;
 
 static cl::opt<unsigned> SeedCL("seed",
@@ -713,9 +713,7 @@ int main(int argc, char **argv) {
   PassManager Passes;
   Passes.add(createVerifierPass());
   Passes.add(createPrintModulePass(&Out->os()));
-  Passes.doInitialization();
   Passes.run(*M.get());
-  Passes.doFinalization();
   Out->keep();
 
   return 0;
diff --git a/tools/llvm-symbolizer/llvm-symbolizer.cpp b/tools/llvm-symbolizer/llvm-symbolizer.cpp
index f3335a3fc3..6a5eb4a662 100644
--- a/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -29,7 +29,6 @@
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include <cstdio>
 #include <cstring>
 #include <map>
diff --git a/tools/lto/CMakeLists.txt b/tools/lto/CMakeLists.txt
index a004bad189..5820b1415b 100644
--- a/tools/lto/CMakeLists.txt
+++ b/tools/lto/CMakeLists.txt
@@ -11,6 +11,8 @@ set(SOURCES
   LTOModule.cpp
   )
 
+set(LLVM_COMMON_DEPENDS intrinsics_gen)
+
 if( NOT WIN32 AND LLVM_ENABLE_PIC )
   set(bsl ${BUILD_SHARED_LIBS})
   set(BUILD_SHARED_LIBS ON)
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 2c7a09b966..da9b90eff1 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -14,37 +14,37 @@
 
 #include "LTOCodeGenerator.h"
 #include "LTOModule.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Linker.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/PassManager.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/CodeGen/IntrinsicLowering.h" // @LOCALMOD
 #include "llvm/Config/config.h"
+#include "llvm/Constants.h"
+#include "llvm/DataLayout.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Linker.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Target/Mangler.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/Host.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/system_error.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
 using namespace llvm;
 
 static cl::opt<bool>
@@ -219,7 +219,8 @@ void LTOCodeGenerator::setMergedModuleSOName(const char *soname)
 void LTOCodeGenerator::addLibraryDep(const char *lib)
 {
   Module *mergedModule = _linker.getModule();
-  mergedModule->addLibrary(lib);
+  // TEMPORARY: make it compile until we add back addLibrary
+  //mergedModule->addLibrary(lib);
 }
 
 void LTOCodeGenerator::wrapSymbol(const char *sym)
@@ -490,9 +491,7 @@ void LTOCodeGenerator::applyScopeRestrictions() {
   passes.add(createInternalizePass(mustPreserveList));
 
   // apply scope restrictions
-  passes.doInitialization();
   passes.run(*mergedModule);
-  passes.doFinalization();
 
   _scopeRestrictionsDone = true;
 }
@@ -527,7 +526,8 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
   // Enabling internalize here would use its AllButMain variant. It
   // keeps only main if it exists and does nothing for libraries. Instead
   // we create the pass ourselves with the symbol list provided by the linker.
-  PassManagerBuilder().populateLTOPassManager(passes, /*Internalize=*/false,
+  PassManagerBuilder().populateLTOPassManager(passes,
+                                              /*Internalize=*/false,
                                               !DisableInline,
                                               DisableGVNLoadPRE);
 
@@ -547,9 +547,7 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
   }
 
   // Run our queue of passes all at once now, efficiently.
-  passes.doInitialization();
   passes.run(*mergedModule);
-  passes.doFinalization();
 
   // Run the code generator, and write assembly file
   codeGenPasses->doInitialization();
diff --git a/tools/lto/LTOCodeGenerator.h b/tools/lto/LTOCodeGenerator.h
index de3d1fa8a5..508c743875 100644
--- a/tools/lto/LTOCodeGenerator.h
+++ b/tools/lto/LTOCodeGenerator.h
@@ -14,10 +14,10 @@
 #ifndef LTO_CODE_GENERATOR_H
 #define LTO_CODE_GENERATOR_H
 
-#include "llvm/Linker.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm-c/lto.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Linker.h"
 #include <string>
 #include <vector>
 
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index cb8a4e5f0d..b864f25056 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -13,21 +13,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "LTOModule.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Constants.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/CodeGen/IntrinsicLowering.h" // @LOCALMOD
 
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/MC/SubtargetFeature.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h" // @LOCALMOD
 #include "llvm/Support/Host.h"
@@ -37,8 +38,7 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/system_error.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 static cl::opt<bool>
@@ -345,14 +345,17 @@ const char *LTOModule::getSOName() {
 }
 
 const char* LTOModule::getLibraryDep(uint32_t index) {
+  /* make it compile until we bring back deplibs
   const Module::LibraryListType &Libs = _module->getLibraries();
   if (index < Libs.size())
     return Libs[index].c_str();
+  */
   return NULL;
 }
 
 uint32_t LTOModule::getNumLibraryDeps() {
-  return _module->getLibraries().size();
+  //return _module->getLibraries().size();
+  return 0;
 }
 // @LOCALMOD-END
 
diff --git a/tools/lto/LTOModule.h b/tools/lto/LTOModule.h
index 03c16d08db..7970faa216 100644
--- a/tools/lto/LTOModule.h
+++ b/tools/lto/LTOModule.h
@@ -14,15 +14,15 @@
 #ifndef LTO_MODULE_H
 #define LTO_MODULE_H
 
-#include "llvm/Module.h"
+#include "llvm-c/lto.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/Module.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm-c/lto.h"
-#include <vector>
 #include <string>
+#include <vector>
 
 // Forward references to llvm classes.
 namespace llvm {
diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index a7c335c934..222cfba5c4 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -13,12 +13,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-c/lto.h"
-#include "llvm-c/Core.h"
-
 #include "llvm/Support/CommandLine.h" // @LOCALMOD
 
-#include "LTOModule.h"
 #include "LTOCodeGenerator.h"
+#include "LTOModule.h"
+#include "llvm-c/Core.h"
 
 
 // Holds most recent error string.
diff --git a/tools/lto/lto.exports b/tools/lto/lto.exports
index 024951f88c..10d2fe03f6 100644
--- a/tools/lto/lto.exports
+++ b/tools/lto/lto.exports
@@ -46,6 +46,7 @@ lto_codegen_set_symbol_needed
 lto_codegen_wrap_symbol_in_merged_module
 lto_codegen_compile_to_file
 LLVMCreateDisasm
+LLVMCreateDisasmCPU
 LLVMDisasmDispose
 LLVMDisasmInstruction
 LLVMSetDisasmOptions
diff --git a/tools/opt/AnalysisWrappers.cpp b/tools/opt/AnalysisWrappers.cpp
index a2b57bb3e1..6ba0fb00f6 100644
--- a/tools/opt/AnalysisWrappers.cpp
+++ b/tools/opt/AnalysisWrappers.cpp
@@ -17,10 +17,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/CallGraph.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
-#include "llvm/Analysis/CallGraph.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/tools/opt/GraphPrinters.cpp b/tools/opt/GraphPrinters.cpp
index 30361f501c..472fe07f21 100644
--- a/tools/opt/GraphPrinters.cpp
+++ b/tools/opt/GraphPrinters.cpp
@@ -14,12 +14,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/GraphWriter.h"
-#include "llvm/Pass.h"
-#include "llvm/Value.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/Dominators.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Value.h"
 using namespace llvm;
 
 template<typename GraphType>
diff --git a/tools/opt/PrintSCC.cpp b/tools/opt/PrintSCC.cpp
index 11efdcdfd2..be45ac1e4d 100644
--- a/tools/opt/PrintSCC.cpp
+++ b/tools/opt/PrintSCC.cpp
@@ -25,12 +25,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Pass.h"
-#include "llvm/Module.h"
+#include "llvm/ADT/SCCIterator.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SCCIterator.h"
 using namespace llvm;
 
 namespace {
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 6d9787513a..ccd4905f87 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -13,39 +13,39 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/LLVMContext.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/DataLayout.h"
 #include "llvm/DebugInfo.h"
+#include "llvm/LinkAllPasses.h"
+#include "llvm/LinkAllVMCore.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/CallGraphSCCPass.h"
-#include "llvm/CodeGen/CommandFlags.h"
-#include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/RegionPass.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Support/PassNameParser.h"
-#include "llvm/Support/Signals.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PassNameParser.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/TargetSelect.h"
-#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/LinkAllPasses.h"
-#include "llvm/LinkAllVMCore.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
-#include <memory>
 #include <algorithm>
+#include <memory>
 using namespace llvm;
 
 // The OptimizationList is automatically populated with registered Passes by the
@@ -823,9 +823,7 @@ int main(int argc, char **argv) {
   cl::PrintOptionValues();
 
   // Now that we have all of the passes ready, run them.
-  Passes.doInitialization();
   Passes.run(*M.get());
-  Passes.doFinalization();
 
   // Declare success.
   if (!NoOutput || PrintBreakpoints)
diff --git a/tools/pso-stub/pso-stub.cpp b/tools/pso-stub/pso-stub.cpp
index 1fdc868499..fc6e184866 100644
--- a/tools/pso-stub/pso-stub.cpp
+++ b/tools/pso-stub/pso-stub.cpp
@@ -162,7 +162,8 @@ void TransferLibrariesNeeded(Module *M, const ObjectFile *obj) {
     StringRef path;
     it->getPath(path);
     outs() << "Adding library " << path << "\n";
-    M->addLibrary(path);
+    // TEMPORARY LOCALMOD
+    // make it compile until we add back addLibrary M->addLibrary(path);
   }
 }
 
diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp
index 117b8204b9..ff350a7872 100644
--- a/unittests/ADT/APFloatTest.cpp
+++ b/unittests/ADT/APFloatTest.cpp
@@ -7,14 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <ostream>
-#include <string>
-#include "llvm/Support/raw_ostream.h"
-#include "gtest/gtest.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+#include <ostream>
+#include <string>
 
 using namespace llvm;
 
diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp
index 49d7e703de..2a4c5b4c49 100644
--- a/unittests/ADT/APIntTest.cpp
+++ b/unittests/ADT/APIntTest.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <ostream>
-#include "gtest/gtest.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallString.h"
+#include "gtest/gtest.h"
+#include <ostream>
 
 using namespace llvm;
 
diff --git a/unittests/ADT/SCCIteratorTest.cpp b/unittests/ADT/SCCIteratorTest.cpp
index 00fa0665dd..92b4b317cf 100644
--- a/unittests/ADT/SCCIteratorTest.cpp
+++ b/unittests/ADT/SCCIteratorTest.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <limits.h>
-#include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/GraphTraits.h"
 #include "gtest/gtest.h"
+#include <limits.h>
 
 using namespace llvm;
 
diff --git a/unittests/ADT/SmallStringTest.cpp b/unittests/ADT/SmallStringTest.cpp
index 660ac44a8b..9398e99c91 100644
--- a/unittests/ADT/SmallStringTest.cpp
+++ b/unittests/ADT/SmallStringTest.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/SmallString.h"
-#include <stdarg.h>
+#include "gtest/gtest.h"
 #include <climits>
 #include <cstring>
+#include <stdarg.h>
 
 using namespace llvm;
 
diff --git a/unittests/ADT/SmallVectorTest.cpp b/unittests/ADT/SmallVectorTest.cpp
index 7fd71f5eb0..90c7982699 100644
--- a/unittests/ADT/SmallVectorTest.cpp
+++ b/unittests/ADT/SmallVectorTest.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
-#include <stdarg.h>
+#include "gtest/gtest.h"
 #include <list>
+#include <stdarg.h>
 
 using namespace llvm;
 
diff --git a/unittests/ADT/StringRefTest.cpp b/unittests/ADT/StringRefTest.cpp
index ead372f365..fa87cd0e2c 100644
--- a/unittests/ADT/StringRefTest.cpp
+++ b/unittests/ADT/StringRefTest.cpp
@@ -7,11 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
 using namespace llvm;
 
 namespace llvm {
diff --git a/unittests/ADT/TinyPtrVectorTest.cpp b/unittests/ADT/TinyPtrVectorTest.cpp
index 05dd797e01..a4f92ffbe3 100644
--- a/unittests/ADT/TinyPtrVectorTest.cpp
+++ b/unittests/ADT/TinyPtrVectorTest.cpp
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
+#include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/type_traits.h"
+#include "gtest/gtest.h"
 #include <algorithm>
 #include <list>
 #include <vector>
@@ -157,7 +157,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveCtorTest) {
   this->expectValues(Copy2, this->testArray(42));
   this->expectValues(this->V2, this->testArray(0));
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   TypeParam Move(std::move(Copy2));
   this->expectValues(Move, this->testArray(42));
   this->expectValues(Copy2, this->testArray(0));
@@ -168,7 +168,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(0));
   this->expectValues(this->V2, this->testArray(0));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(0));
 #endif
@@ -177,7 +177,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(0));
   this->expectValues(this->V2, this->testArray(0));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(1), this->testArray(0));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(0));
@@ -187,7 +187,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(0));
   this->expectValues(this->V2, this->testArray(0));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(2), this->testArray(0));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(0));
@@ -197,7 +197,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(0));
   this->expectValues(this->V2, this->testArray(0));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(42), this->testArray(0));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(0));
@@ -207,7 +207,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(1));
   this->expectValues(this->V2, this->testArray(1));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(0), this->testArray(1));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(1));
@@ -217,7 +217,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(2));
   this->expectValues(this->V2, this->testArray(2));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(0), this->testArray(2));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(2));
@@ -227,7 +227,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(42));
   this->expectValues(this->V2, this->testArray(42));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(0), this->testArray(42));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(42));
@@ -237,7 +237,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(1));
   this->expectValues(this->V2, this->testArray(1));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(1));
 #endif
@@ -246,7 +246,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(2));
   this->expectValues(this->V2, this->testArray(2));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(1), this->testArray(2));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(2));
@@ -256,7 +256,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(42));
   this->expectValues(this->V2, this->testArray(42));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(1), this->testArray(42));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(42));
@@ -266,7 +266,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(1));
   this->expectValues(this->V2, this->testArray(1));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(2), this->testArray(1));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(1));
@@ -276,7 +276,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(2));
   this->expectValues(this->V2, this->testArray(2));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(2), this->testArray(2));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(2));
@@ -286,7 +286,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(42));
   this->expectValues(this->V2, this->testArray(42));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(2), this->testArray(42));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(42));
@@ -296,7 +296,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(1));
   this->expectValues(this->V2, this->testArray(1));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(42), this->testArray(1));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(1));
@@ -306,7 +306,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(2));
   this->expectValues(this->V2, this->testArray(2));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(42), this->testArray(2));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(2));
@@ -316,7 +316,7 @@ TYPED_TEST(TinyPtrVectorTest, CopyAndMoveTest) {
   this->V = this->V2;
   this->expectValues(this->V, this->testArray(42));
   this->expectValues(this->V2, this->testArray(42));
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   this->setVectors(this->testArray(42), this->testArray(42));
   this->V = std::move(this->V2);
   this->expectValues(this->V, this->testArray(42));
diff --git a/unittests/ADT/TwineTest.cpp b/unittests/ADT/TwineTest.cpp
index e9cc41d13f..39d3b561b6 100644
--- a/unittests/ADT/TwineTest.cpp
+++ b/unittests/ADT/TwineTest.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
 using namespace llvm;
 
 namespace {
diff --git a/unittests/ADT/ilistTest.cpp b/unittests/ADT/ilistTest.cpp
index 09a699a962..83eaa31981 100644
--- a/unittests/ADT/ilistTest.cpp
+++ b/unittests/ADT/ilistTest.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <ostream>
-#include "gtest/gtest.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
+#include "gtest/gtest.h"
+#include <ostream>
 
 using namespace llvm;
 
diff --git a/unittests/Analysis/ScalarEvolutionTest.cpp b/unittests/Analysis/ScalarEvolutionTest.cpp
index c30492a5f0..1820345f85 100644
--- a/unittests/Analysis/ScalarEvolutionTest.cpp
+++ b/unittests/Analysis/ScalarEvolutionTest.cpp
@@ -8,13 +8,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/GlobalVariable.h"
 #include "llvm/Constants.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/ADT/SmallVector.h"
 #include "gtest/gtest.h"
 
 namespace llvm {
diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt
index 84bd44439e..e2b7563a28 100644
--- a/unittests/CMakeLists.txt
+++ b/unittests/CMakeLists.txt
@@ -9,6 +9,7 @@ add_subdirectory(ADT)
 add_subdirectory(Analysis)
 add_subdirectory(ExecutionEngine)
 add_subdirectory(Bitcode)
+add_subdirectory(Option)
 add_subdirectory(Support)
 add_subdirectory(Transforms)
 add_subdirectory(VMCore)
diff --git a/unittests/ExecutionEngine/ExecutionEngineTest.cpp b/unittests/ExecutionEngine/ExecutionEngineTest.cpp
index 74a2ccdd06..04fbb7e39b 100644
--- a/unittests/ExecutionEngine/ExecutionEngineTest.cpp
+++ b/unittests/ExecutionEngine/ExecutionEngineTest.cpp
@@ -7,12 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ExecutionEngine/Interpreter.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp b/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
index 333888a565..edaf4ba1d1 100644
--- a/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
@@ -8,15 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ExecutionEngine/JITEventListener.h"
-
-#include "llvm/LLVMContext.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/TypeBuilder.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/CodeGen/MachineCodeInfo.h"
 #include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/TypeBuilder.h"
 #include "gtest/gtest.h"
 #include <vector>
 
diff --git a/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h b/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h
index 5f02b38847..815164678b 100644
--- a/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h
+++ b/unittests/ExecutionEngine/JIT/JITEventListenerTestCommon.h
@@ -10,24 +10,22 @@
 #ifndef JIT_EVENT_LISTENER_TEST_COMMON_H
 #define JIT_EVENT_LISTENER_TEST_COMMON_H
 
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/Config/config.h"
 #include "llvm/DIBuilder.h"
 #include "llvm/DebugInfo.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
-#include "llvm/TypeBuilder.h"
-#include "llvm/CodeGen/MachineCodeInfo.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/TargetSelect.h"
-#include "llvm/Config/config.h"
-
+#include "llvm/TypeBuilder.h"
 #include "gtest/gtest.h"
-
-#include <vector>
 #include <string>
 #include <utility>
+#include <vector>
 
 typedef std::vector<std::pair<std::string, unsigned int> > SourceLocations;
 typedef std::map<uint64_t, SourceLocations> NativeCodeMap;
diff --git a/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp b/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
index be5d152c1c..2741e02b37 100644
--- a/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
@@ -7,14 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/ADT/ArrayRef.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index 6e54449beb..3e883ddb26 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -7,28 +7,27 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Assembly/Parser.h"
 #include "llvm/BasicBlock.h"
+#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Constant.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/TypeBuilder.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Assembly/Parser.h"
-#include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetSelect.h"
-
+#include "llvm/Type.h"
+#include "llvm/TypeBuilder.h"
 #include "gtest/gtest.h"
 #include <vector>
 
diff --git a/unittests/ExecutionEngine/JIT/MultiJITTest.cpp b/unittests/ExecutionEngine/JIT/MultiJITTest.cpp
index 4a22e2f641..0b5190cb39 100644
--- a/unittests/ExecutionEngine/JIT/MultiJITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/MultiJITTest.cpp
@@ -7,13 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/ExecutionEngine/JIT.h"
 #include "llvm/Assembly/Parser.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
-#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
 #include "llvm/Support/SourceMgr.h"
+#include "gtest/gtest.h"
 #include <vector>
 
 using namespace llvm;
diff --git a/unittests/ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp b/unittests/ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp
index 9b0ee60992..7057fcaf1d 100644
--- a/unittests/ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp
+++ b/unittests/ExecutionEngine/JIT/OProfileJITEventListenerTest.cpp
@@ -7,12 +7,11 @@
 //
 //===--------------------------------------------------------------------------------------===//
 
-#include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/OProfileWrapper.h"
 #include "JITEventListenerTestCommon.h"
-
-#include <map>
+#include "llvm/ExecutionEngine/JITEventListener.h"
 #include <list>
+#include <map>
 
 using namespace llvm;
 
diff --git a/unittests/ExecutionEngine/MCJIT/CMakeLists.txt b/unittests/ExecutionEngine/MCJIT/CMakeLists.txt
index 3e9c5b631e..c6b1f77e3e 100644
--- a/unittests/ExecutionEngine/MCJIT/CMakeLists.txt
+++ b/unittests/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -2,14 +2,14 @@ set(LLVM_LINK_COMPONENTS
   asmparser
   bitreader
   bitwriter
-  mcjit
   jit
+  mcjit
   nativecodegen
   )
 
 set(MCJITTestsSources
   MCJITTest.cpp
-  SectionMemoryManager.cpp
+  MCJITMemoryManagerTest.cpp
   )
 
 if(MSVC)
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITMemoryManagerTest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITMemoryManagerTest.cpp
new file mode 100644
index 0000000000..ab09acad0d
--- /dev/null
+++ b/unittests/ExecutionEngine/MCJIT/MCJITMemoryManagerTest.cpp
@@ -0,0 +1,172 @@
+//===- MCJITMemoryManagerTest.cpp - Unit tests for the JIT memory manager -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(MCJITMemoryManagerTest, BasicAllocations) {
+  OwningPtr<SectionMemoryManager> MemMgr(new SectionMemoryManager());
+
+  uint8_t *code1 = MemMgr->allocateCodeSection(256, 0, 1);
+  uint8_t *data1 = MemMgr->allocateDataSection(256, 0, 2, true);
+  uint8_t *code2 = MemMgr->allocateCodeSection(256, 0, 3);
+  uint8_t *data2 = MemMgr->allocateDataSection(256, 0, 4, false);
+
+  EXPECT_NE((uint8_t*)0, code1);
+  EXPECT_NE((uint8_t*)0, code2);
+  EXPECT_NE((uint8_t*)0, data1);
+  EXPECT_NE((uint8_t*)0, data2);
+
+  // Initialize the data
+  for (unsigned i = 0; i < 256; ++i) {
+    code1[i] = 1;
+    code2[i] = 2;
+    data1[i] = 3;
+    data2[i] = 4;
+  }
+
+  // Verify the data (this is checking for overlaps in the addresses)
+  for (unsigned i = 0; i < 256; ++i) {
+    EXPECT_EQ(1, code1[i]);
+    EXPECT_EQ(2, code2[i]);
+    EXPECT_EQ(3, data1[i]);
+    EXPECT_EQ(4, data2[i]);
+  }
+
+  std::string Error;
+  EXPECT_FALSE(MemMgr->applyPermissions(&Error));
+}
+
+TEST(MCJITMemoryManagerTest, LargeAllocations) {
+  OwningPtr<SectionMemoryManager> MemMgr(new SectionMemoryManager());
+
+  uint8_t *code1 = MemMgr->allocateCodeSection(0x100000, 0, 1);
+  uint8_t *data1 = MemMgr->allocateDataSection(0x100000, 0, 2, true);
+  uint8_t *code2 = MemMgr->allocateCodeSection(0x100000, 0, 3);
+  uint8_t *data2 = MemMgr->allocateDataSection(0x100000, 0, 4, false);
+
+  EXPECT_NE((uint8_t*)0, code1);
+  EXPECT_NE((uint8_t*)0, code2);
+  EXPECT_NE((uint8_t*)0, data1);
+  EXPECT_NE((uint8_t*)0, data2);
+
+  // Initialize the data
+  for (unsigned i = 0; i < 0x100000; ++i) {
+    code1[i] = 1;
+    code2[i] = 2;
+    data1[i] = 3;
+    data2[i] = 4;
+  }
+
+  // Verify the data (this is checking for overlaps in the addresses)
+  for (unsigned i = 0; i < 0x100000; ++i) {
+    EXPECT_EQ(1, code1[i]);
+    EXPECT_EQ(2, code2[i]);
+    EXPECT_EQ(3, data1[i]);
+    EXPECT_EQ(4, data2[i]);
+  }
+
+  std::string Error;
+  EXPECT_FALSE(MemMgr->applyPermissions(&Error));
+}
+
+TEST(MCJITMemoryManagerTest, ManyAllocations) {
+  OwningPtr<SectionMemoryManager> MemMgr(new SectionMemoryManager());
+
+  uint8_t* code[10000];
+  uint8_t* data[10000];
+
+  for (unsigned i = 0; i < 10000; ++i) {
+    const bool isReadOnly = i % 2 == 0;
+
+    code[i] = MemMgr->allocateCodeSection(32, 0, 1);
+    data[i] = MemMgr->allocateDataSection(32, 0, 2, isReadOnly);
+
+    for (unsigned j = 0; j < 32; j++) {
+      code[i][j] = 1 + (i % 254);
+      data[i][j] = 2 + (i % 254);
+    }
+
+    EXPECT_NE((uint8_t *)0, code[i]);
+    EXPECT_NE((uint8_t *)0, data[i]);
+  }
+
+  // Verify the data (this is checking for overlaps in the addresses)
+  for (unsigned i = 0; i < 10000; ++i) {
+    for (unsigned j = 0; j < 32;j++ ) {
+      uint8_t ExpectedCode = 1 + (i % 254);
+      uint8_t ExpectedData = 2 + (i % 254);
+      EXPECT_EQ(ExpectedCode, code[i][j]);
+      EXPECT_EQ(ExpectedData, data[i][j]);
+    }
+  }
+
+  std::string Error;
+  EXPECT_FALSE(MemMgr->applyPermissions(&Error));
+}
+
+TEST(MCJITMemoryManagerTest, ManyVariedAllocations) {
+  OwningPtr<SectionMemoryManager> MemMgr(new SectionMemoryManager());
+
+  uint8_t* code[10000];
+  uint8_t* data[10000];
+
+  for (unsigned i = 0; i < 10000; ++i) {
+    uintptr_t CodeSize = i % 16 + 1;
+    uintptr_t DataSize = i % 8 + 1;
+
+    bool isReadOnly = i % 3 == 0;
+    unsigned Align = 8 << (i % 4);
+
+    code[i] = MemMgr->allocateCodeSection(CodeSize, Align, i);
+    data[i] = MemMgr->allocateDataSection(DataSize, Align, i + 10000,
+                                          isReadOnly);
+
+    for (unsigned j = 0; j < CodeSize; j++) {
+      code[i][j] = 1 + (i % 254);
+    }
+
+    for (unsigned j = 0; j < DataSize; j++) {
+      data[i][j] = 2 + (i % 254);
+    }
+
+    EXPECT_NE((uint8_t *)0, code[i]);
+    EXPECT_NE((uint8_t *)0, data[i]);
+
+    uintptr_t CodeAlign = Align ? (uintptr_t)code[i] % Align : 0;
+    uintptr_t DataAlign = Align ? (uintptr_t)data[i] % Align : 0;
+
+    EXPECT_EQ((uintptr_t)0, CodeAlign);
+    EXPECT_EQ((uintptr_t)0, DataAlign);
+  }
+
+  for (unsigned i = 0; i < 10000; ++i) {
+    uintptr_t CodeSize = i % 16 + 1;
+    uintptr_t DataSize = i % 8 + 1;
+
+    for (unsigned j = 0; j < CodeSize; j++) {
+      uint8_t ExpectedCode = 1 + (i % 254);
+      EXPECT_EQ(ExpectedCode, code[i][j]);
+    }
+
+    for (unsigned j = 0; j < DataSize; j++) {
+      uint8_t ExpectedData = 2 + (i % 254);
+      EXPECT_EQ(ExpectedData, data[i][j]); 
+    }
+  }
+}
+
+} // Namespace
+
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp
index 6b79a683bc..e9cf904b18 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp
+++ b/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp
@@ -14,7 +14,6 @@
 
 #include "llvm/ExecutionEngine/MCJIT.h"
 #include "MCJITTestBase.h"
-#include "SectionMemoryManager.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -47,6 +46,7 @@ TEST_F(MCJITTest, global_variable) {
   GlobalValue *Global = insertGlobalInt32(M.get(), "test_global", initialValue);
   createJIT(M.take());
   void *globalPtr =  TheJIT->getPointerToGlobal(Global);
+  MM->applyPermissions();
   static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache();
   EXPECT_TRUE(0 != globalPtr)
     << "Unable to get pointer to global value from JIT";
@@ -61,6 +61,7 @@ TEST_F(MCJITTest, add_function) {
   Function *F = insertAddFunction(M.get());
   createJIT(M.take());
   void *addPtr = TheJIT->getPointerToFunction(F);
+  MM->applyPermissions();
   static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache();
   EXPECT_TRUE(0 != addPtr)
     << "Unable to get pointer to function from JIT";
@@ -78,6 +79,7 @@ TEST_F(MCJITTest, run_main) {
   Function *Main = insertMainFunction(M.get(), 6);
   createJIT(M.take());
   void *vPtr = TheJIT->getPointerToFunction(Main);
+  MM->applyPermissions();
   static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache();
   EXPECT_TRUE(0 != vPtr)
     << "Unable to get pointer to main() from JIT";
@@ -100,6 +102,7 @@ TEST_F(MCJITTest, return_global) {
 
   createJIT(M.take());
   void *rgvPtr = TheJIT->getPointerToFunction(ReturnGlobal);
+  MM->applyPermissions();
   static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache();
   EXPECT_TRUE(0 != rgvPtr);
 
@@ -169,6 +172,7 @@ TEST_F(MCJITTest, multiple_functions) {
 
   createJIT(M.take());
   void *vPtr = TheJIT->getPointerToFunction(Outer);
+  MM->applyPermissions();
   static_cast<SectionMemoryManager*>(MM)->invalidateInstructionCache();
   EXPECT_TRUE(0 != vPtr)
     << "Unable to get pointer to outer function from JIT";
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h b/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
index 9b4a4ac3cf..006bbbe528 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
+++ b/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
@@ -17,10 +17,11 @@
 #ifndef MCJIT_TEST_BASE_H
 #define MCJIT_TEST_BASE_H
 
-#include "llvm/ADT/Triple.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Config/config.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/Function.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/LLVMContext.h"
@@ -30,8 +31,6 @@
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/TypeBuilder.h"
 
-#include "SectionMemoryManager.h"
-
 // Used to skip tests on unsupported architectures and operating systems.
 // To skip a test, add this macro at the top of a test-case in a suite that
 // inherits from MCJITTestBase. See MCJITTest.cpp for examples.
diff --git a/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
deleted file mode 100644
index 225106ecab..0000000000
--- a/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
+++ /dev/null
@@ -1,144 +0,0 @@
-//===-- SectionMemoryManager.cpp - The memory manager for MCJIT -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the implementation of the section-based memory manager
-// used by MCJIT.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Config/config.h"
-#include "llvm/Support/DynamicLibrary.h"
-#include "llvm/Support/MathExtras.h"
-
-#include "SectionMemoryManager.h"
-
-#ifdef __linux__
-// These includes used by SectionMemoryManager::getPointerToNamedFunction()
-// for Glibc trickery. Look comments in this function for more information.
-#ifdef HAVE_SYS_STAT_H
-#include <sys/stat.h>
-#endif
-#include <fcntl.h>
-#include <unistd.h>
-#endif
-
-namespace llvm {
-
-uint8_t *SectionMemoryManager::allocateDataSection(uintptr_t Size,
-                                                    unsigned Alignment,
-                                                    unsigned SectionID,
-                                                    bool IsReadOnly) {
-  if (!Alignment)
-    Alignment = 16;
-  // Ensure that enough memory is requested to allow aligning.
-  size_t NumElementsAligned = 1 + (Size + Alignment - 1)/Alignment;
-  uint8_t *Addr = (uint8_t*)calloc(NumElementsAligned, Alignment);
-
-  // Honour the alignment requirement.
-  uint8_t *AlignedAddr = (uint8_t*)RoundUpToAlignment((uint64_t)Addr, Alignment);
-
-  // Store the original address from calloc so we can free it later.
-  AllocatedDataMem.push_back(sys::MemoryBlock(Addr, NumElementsAligned*Alignment));
-  return AlignedAddr;
-}
-
-uint8_t *SectionMemoryManager::allocateCodeSection(uintptr_t Size,
-                                                    unsigned Alignment,
-                                                    unsigned SectionID) {
-  if (!Alignment)
-    Alignment = 16;
-  unsigned NeedAllocate = Alignment * ((Size + Alignment - 1)/Alignment + 1);
-  uintptr_t Addr = 0;
-  // Look in the list of free code memory regions and use a block there if one
-  // is available.
-  for (int i = 0, e = FreeCodeMem.size(); i != e; ++i) {
-    sys::MemoryBlock &MB = FreeCodeMem[i];
-    if (MB.size() >= NeedAllocate) {
-      Addr = (uintptr_t)MB.base();
-      uintptr_t EndOfBlock = Addr + MB.size();
-      // Align the address.
-      Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
-      // Store cutted free memory block.
-      FreeCodeMem[i] = sys::MemoryBlock((void*)(Addr + Size),
-                                        EndOfBlock - Addr - Size);
-      return (uint8_t*)Addr;
-    }
-  }
-
-  // No pre-allocated free block was large enough. Allocate a new memory region.
-  sys::MemoryBlock MB = sys::Memory::AllocateRWX(NeedAllocate, 0, 0);
-
-  AllocatedCodeMem.push_back(MB);
-  Addr = (uintptr_t)MB.base();
-  uintptr_t EndOfBlock = Addr + MB.size();
-  // Align the address.
-  Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
-  // The AllocateRWX may allocate much more memory than we need. In this case,
-  // we store the unused memory as a free memory block.
-  unsigned FreeSize = EndOfBlock-Addr-Size;
-  if (FreeSize > 16)
-    FreeCodeMem.push_back(sys::MemoryBlock((void*)(Addr + Size), FreeSize));
-
-  // Return aligned address
-  return (uint8_t*)Addr;
-}
-
-void SectionMemoryManager::invalidateInstructionCache() {
-  for (int i = 0, e = AllocatedCodeMem.size(); i != e; ++i)
-    sys::Memory::InvalidateInstructionCache(AllocatedCodeMem[i].base(),
-                                            AllocatedCodeMem[i].size());
-}
-
-void *SectionMemoryManager::getPointerToNamedFunction(const std::string &Name,
-                                                       bool AbortOnFailure) {
-#if defined(__linux__)
-  //===--------------------------------------------------------------------===//
-  // Function stubs that are invoked instead of certain library calls
-  //
-  // Force the following functions to be linked in to anything that uses the
-  // JIT. This is a hack designed to work around the all-too-clever Glibc
-  // strategy of making these functions work differently when inlined vs. when
-  // not inlined, and hiding their real definitions in a separate archive file
-  // that the dynamic linker can't see. For more info, search for
-  // 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
-  if (Name == "stat") return (void*)(intptr_t)&stat;
-  if (Name == "fstat") return (void*)(intptr_t)&fstat;
-  if (Name == "lstat") return (void*)(intptr_t)&lstat;
-  if (Name == "stat64") return (void*)(intptr_t)&stat64;
-  if (Name == "fstat64") return (void*)(intptr_t)&fstat64;
-  if (Name == "lstat64") return (void*)(intptr_t)&lstat64;
-  if (Name == "atexit") return (void*)(intptr_t)&atexit;
-  if (Name == "mknod") return (void*)(intptr_t)&mknod;
-#endif // __linux__
-
-  const char *NameStr = Name.c_str();
-  void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
-  if (Ptr) return Ptr;
-
-  // If it wasn't found and if it starts with an underscore ('_') character,
-  // try again without the underscore.
-  if (NameStr[0] == '_') {
-    Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
-    if (Ptr) return Ptr;
-  }
-
-  if (AbortOnFailure)
-    report_fatal_error("Program used external function '" + Name +
-                      "' which could not be resolved!");
-  return 0;
-}
-
-SectionMemoryManager::~SectionMemoryManager() {
-  for (unsigned i = 0, e = AllocatedCodeMem.size(); i != e; ++i)
-    sys::Memory::ReleaseRWX(AllocatedCodeMem[i]);
-  for (unsigned i = 0, e = AllocatedDataMem.size(); i != e; ++i)
-    free(AllocatedDataMem[i].base());
-}
-
-} // namespace llvm
diff --git a/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h b/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h
deleted file mode 100644
index 968ee63ffd..0000000000
--- a/unittests/ExecutionEngine/MCJIT/SectionMemoryManager.h
+++ /dev/null
@@ -1,120 +0,0 @@
-//===-- SectionMemoryManager.h - Memory allocator for MCJIT -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of a section-based memory manager used by
-// the MCJIT execution engine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTION_ENGINE_SECTION_MEMORY_MANAGER_H
-#define LLVM_EXECUTION_ENGINE_SECTION_MEMORY_MANAGER_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ExecutionEngine/JITMemoryManager.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Memory.h"
-
-namespace llvm {
-
-// Section-based memory manager for MCJIT
-class SectionMemoryManager : public JITMemoryManager {
-
-public:
-
-  SectionMemoryManager() { }
-  ~SectionMemoryManager();
-
-  virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
-                                       unsigned SectionID);
-
-  virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
-                                       unsigned SectionID, bool IsReadOnly);
-
-  virtual bool applyPermissions(std::string *ErrMsg) { return false; }
-
-  virtual void *getPointerToNamedFunction(const std::string &Name,
-                                          bool AbortOnFailure = true);
-
-  // Invalidate instruction cache for code sections. Some platforms with
-  // separate data cache and instruction cache require explicit cache flush,
-  // otherwise JIT code manipulations (like resolved relocations) will get to
-  // the data cache but not to the instruction cache.
-  virtual void invalidateInstructionCache();
-
-private:
-
-  SmallVector<sys::MemoryBlock, 16> AllocatedDataMem;
-  SmallVector<sys::MemoryBlock, 16> AllocatedCodeMem;
-  SmallVector<sys::MemoryBlock, 16> FreeCodeMem;
-
-public:
-
-  ///
-  /// Functions below are not used by MCJIT, but must be implemented because
-  /// they are declared as pure virtuals in the base class.
-  ///
-
-  virtual void setMemoryWritable() {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual void setMemoryExecutable() {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual void setPoisonMemory(bool poison) {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual void AllocateGOT() {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual uint8_t *getGOTBase() const {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual uint8_t *startFunctionBody(const Function *F,
-                                     uintptr_t &ActualSize){
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
-                                unsigned Alignment) {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart,
-                               uint8_t *FunctionEnd) {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual void deallocateFunctionBody(void *Body) {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual uint8_t *startExceptionTable(const Function *F,
-                                       uintptr_t &ActualSize) {
-    llvm_unreachable("Unexpected call!");
-    return 0;
-  }
-  virtual void endExceptionTable(const Function *F, uint8_t *TableStart,
-                                 uint8_t *TableEnd, uint8_t *FrameRegister) {
-    llvm_unreachable("Unexpected call!");
-  }
-  virtual void deallocateExceptionTable(void *ET) {
-    llvm_unreachable("Unexpected call!");
-  }
-};
-
-}
-
-#endif // LLVM_EXECUTION_ENGINE_SECTION_MEMORY_MANAGER_H
diff --git a/unittests/Option/CMakeLists.txt b/unittests/Option/CMakeLists.txt
new file mode 100644
index 0000000000..185d503912
--- /dev/null
+++ b/unittests/Option/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(LLVM_LINK_COMPONENTS
+  Option
+  Support
+  )
+
+set(LLVM_TARGET_DEFINITIONS Opts.td)
+
+tablegen(LLVM Opts.inc -gen-opt-parser-defs)
+add_public_tablegen_target(OptsTestTableGen)
+
+add_llvm_unittest(OptionTests
+  OptionParsingTest.cpp
+  )
+
+add_dependencies(OptionTests OptsTestTableGen)
diff --git a/unittests/Option/OptionParsingTest.cpp b/unittests/Option/OptionParsingTest.cpp
new file mode 100644
index 0000000000..10e4be8dc1
--- /dev/null
+++ b/unittests/Option/OptionParsingTest.cpp
@@ -0,0 +1,102 @@
+//===- unittest/Support/OptionParsingTest.cpp - OptTable tests ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Option/Arg.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::opt;
+
+enum ID {
+  OPT_INVALID = 0, // This is not an option ID.
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, FLAGS, PARAM, \
+              HELPTEXT, METAVAR) OPT_##ID,
+#include "Opts.inc"
+  LastOption
+#undef OPTION
+};
+
+#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE;
+#include "Opts.inc"
+#undef PREFIX
+
+static const OptTable::Info InfoTable[] = {
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, FLAGS, PARAM, \
+               HELPTEXT, METAVAR)   \
+  { PREFIX, NAME, HELPTEXT, METAVAR, OPT_##ID, Option::KIND##Class, PARAM, \
+    FLAGS, OPT_##GROUP, OPT_##ALIAS },
+#include "Opts.inc"
+#undef OPTION
+};
+
+namespace {
+class TestOptTable : public OptTable {
+public:
+  TestOptTable()
+    : OptTable(InfoTable, sizeof(InfoTable) / sizeof(InfoTable[0])) {}
+};
+}
+
+const char *Args[] = {
+  "-A",
+  "-Bhi",
+  "--C=desu",
+  "-C", "bye",
+  "-D,adena",
+  "-E", "apple", "bloom",
+  "-Fblarg",
+  "-F", "42",
+  "-Gchuu", "2"
+  };
+
+TEST(Support, OptionParsing) {
+  TestOptTable T;
+  unsigned MAI, MAC;
+  InputArgList *AL = T.ParseArgs(Args, Args + (sizeof(Args) / sizeof(Args[0])), MAI, MAC);
+  
+  // Check they all exist.
+  EXPECT_TRUE(AL->hasArg(OPT_A));
+  EXPECT_TRUE(AL->hasArg(OPT_B));
+  EXPECT_TRUE(AL->hasArg(OPT_C));
+  EXPECT_TRUE(AL->hasArg(OPT_D));
+  EXPECT_TRUE(AL->hasArg(OPT_E));
+  EXPECT_TRUE(AL->hasArg(OPT_F));
+  EXPECT_TRUE(AL->hasArg(OPT_G));
+
+  // Check the values.
+  EXPECT_EQ(AL->getLastArgValue(OPT_B), "hi");
+  EXPECT_EQ(AL->getLastArgValue(OPT_C), "bye");
+  EXPECT_EQ(AL->getLastArgValue(OPT_D), "adena");
+  std::vector<std::string> Es = AL->getAllArgValues(OPT_E);
+  EXPECT_EQ(Es[0], "apple");
+  EXPECT_EQ(Es[1], "bloom");
+  EXPECT_EQ(AL->getLastArgValue(OPT_F), "42");
+  std::vector<std::string> Gs = AL->getAllArgValues(OPT_G);
+  EXPECT_EQ(Gs[0], "chuu");
+  EXPECT_EQ(Gs[1], "2");
+
+  // Check the help text.
+  std::string Help;
+  raw_string_ostream RSO(Help);
+  T.PrintHelp(RSO, "test", "title!");
+  EXPECT_NE(Help.find("-A"), std::string::npos);
+
+  // Test aliases.
+  arg_iterator Cs = AL->filtered_begin(OPT_C);
+  ASSERT_NE(Cs, AL->filtered_end());
+  EXPECT_EQ(StringRef((*Cs)->getValue()), "desu");
+  ArgStringList ASL;
+  (*Cs)->render(*AL, ASL);
+  ASSERT_EQ(ASL.size(), 2u);
+  EXPECT_EQ(StringRef(ASL[0]), "-C");
+  EXPECT_EQ(StringRef(ASL[1]), "desu");
+}
diff --git a/unittests/Option/Opts.td b/unittests/Option/Opts.td
new file mode 100644
index 0000000000..3d6242f518
--- /dev/null
+++ b/unittests/Option/Opts.td
@@ -0,0 +1,13 @@
+include "llvm/Option/OptParser.td"
+
+def A : Flag<["-"], "A">, HelpText<"The A option">;
+def B : Joined<["-"], "B">, HelpText<"The B option">, MetaVarName<"B">;
+def C : Separate<["-"], "C">, HelpText<"The C option">, MetaVarName<"C">;
+def D : CommaJoined<["-"], "D">, HelpText<"The D option">, MetaVarName<"D">;
+def E : MultiArg<["-"], "E", 2>;
+def F : JoinedOrSeparate<["-"], "F">, HelpText<"The F option">, MetaVarName<"F">;
+def G : JoinedAndSeparate<["-"], "G">, HelpText<"The G option">, MetaVarName<"G">;
+
+def Ceq : Joined<["-", "--"], "C=">, Alias<C>;
+
+def H : Flag<["-"], "H">, Flags<[HelpHidden]>;
diff --git a/unittests/Support/AlignOfTest.cpp b/unittests/Support/AlignOfTest.cpp
index a9be1c8415..b518f3b861 100644
--- a/unittests/Support/AlignOfTest.cpp
+++ b/unittests/Support/AlignOfTest.cpp
@@ -9,7 +9,6 @@
 
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
-
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/Support/AllocatorTest.cpp b/unittests/Support/AllocatorTest.cpp
index 8b463c11df..cb9fa43036 100644
--- a/unittests/Support/AllocatorTest.cpp
+++ b/unittests/Support/AllocatorTest.cpp
@@ -8,7 +8,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Allocator.h"
-
 #include "gtest/gtest.h"
 #include <cstdlib>
 
diff --git a/unittests/Support/BlockFrequencyTest.cpp b/unittests/Support/BlockFrequencyTest.cpp
index 9c5bd7b893..ff66bc4e45 100644
--- a/unittests/Support/BlockFrequencyTest.cpp
+++ b/unittests/Support/BlockFrequencyTest.cpp
@@ -1,7 +1,6 @@
-#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/BranchProbability.h"
-
+#include "llvm/Support/DataTypes.h"
 #include "gtest/gtest.h"
 #include <climits>
 
diff --git a/unittests/Support/Casting.cpp b/unittests/Support/Casting.cpp
index ad564aa366..01583e43e2 100644
--- a/unittests/Support/Casting.cpp
+++ b/unittests/Support/Casting.cpp
@@ -10,7 +10,6 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include "gtest/gtest.h"
 #include <cstdlib>
 
diff --git a/unittests/Support/CommandLineTest.cpp b/unittests/Support/CommandLineTest.cpp
index 13e903858a..43c8cbd123 100644
--- a/unittests/Support/CommandLineTest.cpp
+++ b/unittests/Support/CommandLineTest.cpp
@@ -9,11 +9,9 @@
 
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Config/config.h"
-
 #include "gtest/gtest.h"
-
-#include <string>
 #include <stdlib.h>
+#include <string>
 
 using namespace llvm;
 
diff --git a/unittests/Support/ConstantRangeTest.cpp b/unittests/Support/ConstantRangeTest.cpp
index 263f93c9ff..2c9a1f832e 100644
--- a/unittests/Support/ConstantRangeTest.cpp
+++ b/unittests/Support/ConstantRangeTest.cpp
@@ -9,7 +9,6 @@
 
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Instructions.h"
-
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/Support/EndianTest.cpp b/unittests/Support/EndianTest.cpp
index 6fe0247d46..00ea2ae5d9 100644
--- a/unittests/Support/EndianTest.cpp
+++ b/unittests/Support/EndianTest.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/DataTypes.h"
+#include "gtest/gtest.h"
 #include <cstdlib>
 #include <ctime>
 using namespace llvm;
diff --git a/unittests/Support/FileOutputBufferTest.cpp b/unittests/Support/FileOutputBufferTest.cpp
index edd350afcf..80d7245368 100644
--- a/unittests/Support/FileOutputBufferTest.cpp
+++ b/unittests/Support/FileOutputBufferTest.cpp
@@ -7,13 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/PathV2.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -27,13 +26,6 @@ using namespace llvm::sys;
   } else {}
 
 namespace {
-
-
-// NOTE: Temporarily run this test on unix only.  Once the file mapping
-// routines are ported to Windows, this conditional can be removed.
-#if LLVM_ON_UNIX
-
-
 TEST(FileOutputBuffer, Test) {
   // Create unique temporary directory for these tests
   SmallString<128> TestDirectory;
@@ -45,7 +37,7 @@ TEST(FileOutputBuffer, Test) {
     ::close(fd);
     TestDirectory = path::parent_path(TestDirectory);
   }
-     
+
   // TEST 1: Verify commit case.
   SmallString<128> File1(TestDirectory);
 	File1.append("/file1");
@@ -61,7 +53,7 @@ TEST(FileOutputBuffer, Test) {
   }
   // Verify file exists and starts with special header.
   bool MagicMatches = false;
-  ASSERT_NO_ERROR(fs::has_magic(Twine(File1), Twine("AABBCCDDEEFFGGHHIIJJ"), 
+  ASSERT_NO_ERROR(fs::has_magic(Twine(File1), Twine("AABBCCDDEEFFGGHHIIJJ"),
                                                                 MagicMatches));
   EXPECT_TRUE(MagicMatches);
   // Verify file is correct size.
@@ -82,8 +74,7 @@ TEST(FileOutputBuffer, Test) {
   // Verify file does not exist (because buffer not commited).
   bool Exists = false;
   ASSERT_NO_ERROR(fs::exists(Twine(File2), Exists));
-  EXPECT_FALSE(Exists);  
-
+  EXPECT_FALSE(Exists);
 
   // TEST 3: Verify sizing down case.
   SmallString<128> File3(TestDirectory);
@@ -100,7 +91,7 @@ TEST(FileOutputBuffer, Test) {
   }
   // Verify file exists and starts with special header.
   bool MagicMatches3 = false;
-  ASSERT_NO_ERROR(fs::has_magic(Twine(File3), Twine("AABBCCDDEEFFGGHHIIJJ"), 
+  ASSERT_NO_ERROR(fs::has_magic(Twine(File3), Twine("AABBCCDDEEFFGGHHIIJJ"),
                                                               MagicMatches3));
   EXPECT_TRUE(MagicMatches3);
   // Verify file is correct size.
@@ -108,13 +99,12 @@ TEST(FileOutputBuffer, Test) {
   ASSERT_NO_ERROR(fs::file_size(Twine(File3), File3Size));
   ASSERT_EQ(File3Size, 5000ULL);
 
-
   // TEST 4: Verify file can be made executable.
   SmallString<128> File4(TestDirectory);
 	File4.append("/file4");
   {
     OwningPtr<FileOutputBuffer> Buffer;
-    ASSERT_NO_ERROR(FileOutputBuffer::create(File4, 8192, Buffer, 
+    ASSERT_NO_ERROR(FileOutputBuffer::create(File4, 8192, Buffer,
                                               FileOutputBuffer::F_executable));
     // Start buffer with special header.
     memcpy(Buffer->getBufferStart(), "AABBCCDDEEFFGGHHIIJJ", 20);
@@ -131,7 +121,4 @@ TEST(FileOutputBuffer, Test) {
   uint32_t RemovedCount;
   ASSERT_NO_ERROR(fs::remove_all(TestDirectory.str(), RemovedCount));
 }
-
-#endif // LLVM_ON_UNIX
-
 } // anonymous namespace
diff --git a/unittests/Support/IntegersSubsetTest.cpp b/unittests/Support/IntegersSubsetTest.cpp
index 5d1dde4c37..f4298bf595 100644
--- a/unittests/Support/IntegersSubsetTest.cpp
+++ b/unittests/Support/IntegersSubsetTest.cpp
@@ -7,12 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/APInt.h"
 #include "llvm/Support/IntegersSubset.h" 
+#include "llvm/ADT/APInt.h"
 #include "llvm/Support/IntegersSubsetMapping.h"
-
 #include "gtest/gtest.h"
-
 #include <vector>
 
 using namespace llvm;
diff --git a/unittests/Support/ManagedStatic.cpp b/unittests/Support/ManagedStatic.cpp
index bfeb0a7b6f..79eb098e56 100644
--- a/unittests/Support/ManagedStatic.cpp
+++ b/unittests/Support/ManagedStatic.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/Threading.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/Threading.h"
 #ifdef HAVE_PTHREAD_H
 #include <pthread.h>
 #endif
diff --git a/unittests/Support/MemoryBufferTest.cpp b/unittests/Support/MemoryBufferTest.cpp
index 6c78cd80e8..1d9f482c51 100644
--- a/unittests/Support/MemoryBufferTest.cpp
+++ b/unittests/Support/MemoryBufferTest.cpp
@@ -13,7 +13,6 @@
 
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/ADT/OwningPtr.h"
-
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/Support/MemoryTest.cpp b/unittests/Support/MemoryTest.cpp
index fcf9aebad2..4164713fcb 100644
--- a/unittests/Support/MemoryTest.cpp
+++ b/unittests/Support/MemoryTest.cpp
@@ -9,7 +9,6 @@
 
 #include "llvm/Support/Memory.h"
 #include "llvm/Support/Process.h"
-
 #include "gtest/gtest.h"
 #include <cstdlib>
 
@@ -99,8 +98,9 @@ TEST_P(MappedMemoryTest, MultipleAllocAndRelease) {
 }
 
 TEST_P(MappedMemoryTest, BasicWrite) {
-  // This test applies only to writeable combinations
-  if (Flags && !(Flags & Memory::MF_WRITE))
+  // This test applies only to readable and writeable combinations
+  if (Flags &&
+      !((Flags & Memory::MF_READ) && (Flags & Memory::MF_WRITE)))
     return;
 
   error_code EC;
@@ -118,8 +118,9 @@ TEST_P(MappedMemoryTest, BasicWrite) {
 }
 
 TEST_P(MappedMemoryTest, MultipleWrite) {
-  // This test applies only to writeable combinations
-  if (Flags && !(Flags & Memory::MF_WRITE))
+  // This test applies only to readable and writeable combinations
+  if (Flags &&
+      !((Flags & Memory::MF_READ) && (Flags & Memory::MF_WRITE)))
     return;
   error_code EC;
   MemoryBlock M1 = Memory::allocateMappedMemory(sizeof(int), 0, Flags, EC);
diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp
index 63c9ae0591..878c22796a 100644
--- a/unittests/Support/Path.cpp
+++ b/unittests/Support/Path.cpp
@@ -7,11 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/PathV2.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
-
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -375,7 +374,7 @@ TEST_F(FileSystemTest, FileMapping) {
   
   // Unmap temp file
 
-#if LLVM_USE_RVALUE_REFERENCES
+#if LLVM_HAS_RVALUE_REFERENCES
   fs::mapped_file_region m(Twine(TempPath),
                              fs::mapped_file_region::readonly,
                              0,
diff --git a/unittests/Support/RegexTest.cpp b/unittests/Support/RegexTest.cpp
index 65b66c3eee..3577d1015e 100644
--- a/unittests/Support/RegexTest.cpp
+++ b/unittests/Support/RegexTest.cpp
@@ -7,9 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/ADT/SmallVector.h"
+#include "gtest/gtest.h"
 #include <cstring>
 
 using namespace llvm;
@@ -51,7 +51,6 @@ TEST_F(RegexTest, Basics) {
   EXPECT_EQ(1u, Matches.size());
   EXPECT_EQ(String, Matches[0].str());
 
-
   std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)";
   String="YX99a:513b";
   NulPattern[7] = '\0';
@@ -62,6 +61,28 @@ TEST_F(RegexTest, Basics) {
   EXPECT_TRUE(r5.match(String));
 }
 
+TEST_F(RegexTest, Backreferences) {
+  Regex r1("([a-z]+)_\\1");
+  SmallVector<StringRef, 4> Matches;
+  EXPECT_TRUE(r1.match("abc_abc", &Matches));
+  EXPECT_EQ(2u, Matches.size());
+  EXPECT_FALSE(r1.match("abc_ab", &Matches));
+
+  Regex r2("a([0-9])b\\1c\\1");
+  EXPECT_TRUE(r2.match("a4b4c4", &Matches));
+  EXPECT_EQ(2u, Matches.size());
+  EXPECT_EQ("4", Matches[1].str());
+  EXPECT_FALSE(r2.match("a2b2c3"));
+
+  Regex r3("a([0-9])([a-z])b\\1\\2");
+  EXPECT_TRUE(r3.match("a6zb6z", &Matches));
+  EXPECT_EQ(3u, Matches.size());
+  EXPECT_EQ("6", Matches[1].str());
+  EXPECT_EQ("z", Matches[2].str());
+  EXPECT_FALSE(r3.match("a6zb6y"));
+  EXPECT_FALSE(r3.match("a6zb7z"));
+}
+
 TEST_F(RegexTest, Substitution) {
   std::string Error;
 
diff --git a/unittests/Support/ValueHandleTest.cpp b/unittests/Support/ValueHandleTest.cpp
index 2e5e5b167c..af03e1bb35 100644
--- a/unittests/Support/ValueHandleTest.cpp
+++ b/unittests/Support/ValueHandleTest.cpp
@@ -8,14 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/ValueHandle.h"
-
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/ADT/OwningPtr.h"
-
 #include "gtest/gtest.h"
-
 #include <memory>
 
 using namespace llvm;
diff --git a/unittests/Support/formatted_raw_ostream_test.cpp b/unittests/Support/formatted_raw_ostream_test.cpp
index 4725cedc21..9bb8046913 100644
--- a/unittests/Support/formatted_raw_ostream_test.cpp
+++ b/unittests/Support/formatted_raw_ostream_test.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
+#include "llvm/Support/FormattedStream.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/FormattedStream.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 
diff --git a/unittests/Transforms/Utils/Cloning.cpp b/unittests/Transforms/Utils/Cloning.cpp
index ea3d5bee78..9df5787028 100644
--- a/unittests/Transforms/Utils/Cloning.cpp
+++ b/unittests/Transforms/Utils/Cloning.cpp
@@ -7,13 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Argument.h"
 #include "llvm/Constant.h"
-#include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/STLExtras.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 
diff --git a/unittests/Transforms/Utils/IntegerDivision.cpp b/unittests/Transforms/Utils/IntegerDivision.cpp
index a3211391d6..ecc997c771 100644
--- a/unittests/Transforms/Utils/IntegerDivision.cpp
+++ b/unittests/Transforms/Utils/IntegerDivision.cpp
@@ -7,13 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
+#include "llvm/Transforms/Utils/IntegerDivision.h"
 #include "llvm/BasicBlock.h"
-#include "llvm/GlobalValue.h"
 #include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/Module.h"
-#include "llvm/Transforms/Utils/IntegerDivision.h"
+#include "gtest/gtest.h"
 
 using namespace llvm;
 
diff --git a/unittests/Transforms/Utils/Local.cpp b/unittests/Transforms/Utils/Local.cpp
index 727f5ea525..cb9232cae5 100644
--- a/unittests/Transforms/Utils/Local.cpp
+++ b/unittests/Transforms/Utils/Local.cpp
@@ -7,12 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/IRBuilder.h"
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/Transforms/Utils/Local.h"
-
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/VMCore/ConstantsTest.cpp b/unittests/VMCore/ConstantsTest.cpp
index 25d61cc6ca..5cbd0ce298 100644
--- a/unittests/VMCore/ConstantsTest.cpp
+++ b/unittests/VMCore/ConstantsTest.cpp
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Constants.h"
-#include "llvm/Instruction.h"
-#include "llvm/InstrTypes.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instruction.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "gtest/gtest.h"
diff --git a/unittests/VMCore/DominatorTreeTest.cpp b/unittests/VMCore/DominatorTreeTest.cpp
index f6a90605a7..77449308a4 100644
--- a/unittests/VMCore/DominatorTreeTest.cpp
+++ b/unittests/VMCore/DominatorTreeTest.cpp
@@ -1,9 +1,9 @@
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Assembly/Parser.h"
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/PassManager.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/Assembly/Parser.h"
 #include "llvm/Support/SourceMgr.h"
 #include "gtest/gtest.h"
 
diff --git a/unittests/VMCore/IRBuilderTest.cpp b/unittests/VMCore/IRBuilderTest.cpp
index 9f26936df4..d1d59c71eb 100644
--- a/unittests/VMCore/IRBuilderTest.cpp
+++ b/unittests/VMCore/IRBuilderTest.cpp
@@ -7,16 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IRBuilder.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/DataLayout.h"
 #include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/MDBuilder.h"
 #include "llvm/Module.h"
-#include "llvm/ADT/OwningPtr.h"
-
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -31,6 +30,8 @@ protected:
                                           /*isVarArg=*/false);
     F = Function::Create(FTy, Function::ExternalLinkage, "", M.get());
     BB = BasicBlock::Create(getGlobalContext(), "", F);
+    GV = new GlobalVariable(Type::getFloatTy(getGlobalContext()), true,
+                            GlobalValue::ExternalLinkage);
   }
 
   virtual void TearDown() {
@@ -41,6 +42,7 @@ protected:
   OwningPtr<Module> M;
   Function *F;
   BasicBlock *BB;
+  GlobalVariable *GV;
 };
 
 TEST_F(IRBuilderTest, Lifetime) {
@@ -108,4 +110,68 @@ TEST_F(IRBuilderTest, GetIntTy) {
   EXPECT_EQ(IntPtrTy, IntegerType::get(getGlobalContext(), IntPtrBitSize));
 }
 
+TEST_F(IRBuilderTest, FastMathFlags) {
+  IRBuilder<> Builder(BB);
+  Value *F;
+  Instruction *FDiv, *FAdd;
+
+  F = Builder.CreateLoad(GV);
+  F = Builder.CreateFAdd(F, F);
+
+  EXPECT_FALSE(Builder.getFastMathFlags().any());
+  ASSERT_TRUE(isa<Instruction>(F));
+  FAdd = cast<Instruction>(F);
+  EXPECT_FALSE(FAdd->hasNoNaNs());
+
+  FastMathFlags FMF;
+  Builder.SetFastMathFlags(FMF);
+
+  F = Builder.CreateFAdd(F, F);
+  EXPECT_FALSE(Builder.getFastMathFlags().any());
+
+  FMF.setUnsafeAlgebra();
+  Builder.SetFastMathFlags(FMF);
+
+  F = Builder.CreateFAdd(F, F);
+  EXPECT_TRUE(Builder.getFastMathFlags().any());
+  ASSERT_TRUE(isa<Instruction>(F));
+  FAdd = cast<Instruction>(F);
+  EXPECT_TRUE(FAdd->hasNoNaNs());
+
+  F = Builder.CreateFDiv(F, F);
+  EXPECT_TRUE(Builder.getFastMathFlags().any());
+  EXPECT_TRUE(Builder.getFastMathFlags().UnsafeAlgebra);
+  ASSERT_TRUE(isa<Instruction>(F));
+  FDiv = cast<Instruction>(F);
+  EXPECT_TRUE(FDiv->hasAllowReciprocal());
+
+  Builder.clearFastMathFlags();
+
+  F = Builder.CreateFDiv(F, F);
+  ASSERT_TRUE(isa<Instruction>(F));
+  FDiv = cast<Instruction>(F);
+  EXPECT_FALSE(FDiv->hasAllowReciprocal());
+
+  FMF.clear();
+  FMF.setAllowReciprocal();
+  Builder.SetFastMathFlags(FMF);
+
+  F = Builder.CreateFDiv(F, F);
+  EXPECT_TRUE(Builder.getFastMathFlags().any());
+  EXPECT_TRUE(Builder.getFastMathFlags().AllowReciprocal);
+  ASSERT_TRUE(isa<Instruction>(F));
+  FDiv = cast<Instruction>(F);
+  EXPECT_TRUE(FDiv->hasAllowReciprocal());
+
+  Builder.clearFastMathFlags();
+
+  F = Builder.CreateFDiv(F, F);
+  ASSERT_TRUE(isa<Instruction>(F));
+  FDiv = cast<Instruction>(F);
+  EXPECT_FALSE(FDiv->getFastMathFlags().any());
+  FDiv->copyFastMathFlags(FAdd);
+  EXPECT_TRUE(FDiv->hasNoNaNs());
+
+}
+
 }
diff --git a/unittests/VMCore/InstructionsTest.cpp b/unittests/VMCore/InstructionsTest.cpp
index a3b13ce92d..f812812de4 100644
--- a/unittests/VMCore/InstructionsTest.cpp
+++ b/unittests/VMCore/InstructionsTest.cpp
@@ -7,17 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Instructions.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/Constants.h"
 #include "llvm/DataLayout.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/MDBuilder.h"
 #include "llvm/Operator.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "gtest/gtest.h"
 
 namespace llvm {
diff --git a/unittests/VMCore/MDBuilderTest.cpp b/unittests/VMCore/MDBuilderTest.cpp
index 847039b837..11d9872bf0 100644
--- a/unittests/VMCore/MDBuilderTest.cpp
+++ b/unittests/VMCore/MDBuilderTest.cpp
@@ -7,10 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IRBuilder.h"
 #include "llvm/MDBuilder.h"
+#include "llvm/IRBuilder.h"
 #include "llvm/Operator.h"
-
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/VMCore/MetadataTest.cpp b/unittests/VMCore/MetadataTest.cpp
index 08927a2ff5..a740d5330a 100644
--- a/unittests/VMCore/MetadataTest.cpp
+++ b/unittests/VMCore/MetadataTest.cpp
@@ -7,15 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "gtest/gtest.h"
+#include "llvm/Metadata.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
 #include "llvm/Module.h"
-#include "llvm/Type.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Type.h"
+#include "gtest/gtest.h"
 using namespace llvm;
 
 namespace {
diff --git a/unittests/VMCore/PassManagerTest.cpp b/unittests/VMCore/PassManagerTest.cpp
index 9c070c84bb..6cfc0369a4 100644
--- a/unittests/VMCore/PassManagerTest.cpp
+++ b/unittests/VMCore/PassManagerTest.cpp
@@ -7,29 +7,28 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Module.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/PassManager.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Pass.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/BasicBlock.h"
 #include "llvm/CallGraphSCCPass.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
 #include "llvm/DataLayout.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/Constants.h"
-#include "llvm/GlobalVariable.h"
 #include "llvm/Function.h"
-#include "llvm/CallingConv.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Instructions.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/PassManager.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/Verifier.h"
-#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Support/raw_ostream.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -148,6 +147,10 @@ namespace llvm {
     template<typename T, typename P>
     struct PassTest : public PassTestBase<P> {
     public:
+#ifndef _MSC_VER // MSVC complains that Pass is not base class.
+      using llvm::Pass::doInitialization;
+      using llvm::Pass::doFinalization;
+#endif
       virtual bool doInitialization(T &t) {
         EXPECT_FALSE(PassTestBase<P>::initialized);
         PassTestBase<P>::initialized = true;
@@ -198,6 +201,8 @@ namespace llvm {
         EXPECT_EQ(run, initcount);
         EXPECT_EQ(finalized, fincount);
       }
+      using llvm::Pass::doInitialization;
+      using llvm::Pass::doFinalization;
       virtual bool doInitialization(Loop* L, LPPassManager &LPM) {
         initialized = true;
         initcount++;
@@ -426,7 +431,7 @@ namespace llvm {
         /*Linkage=*/GlobalValue::ExternalLinkage,
         /*Name=*/"test1", mod);
       func_test1->setCallingConv(CallingConv::C);
-      AttrListPtr func_test1_PAL;
+      AttributeSet func_test1_PAL;
       func_test1->setAttributes(func_test1_PAL);
 
       Function* func_test2 = Function::Create(
@@ -434,7 +439,7 @@ namespace llvm {
         /*Linkage=*/GlobalValue::ExternalLinkage,
         /*Name=*/"test2", mod);
       func_test2->setCallingConv(CallingConv::C);
-      AttrListPtr func_test2_PAL;
+      AttributeSet func_test2_PAL;
       func_test2->setAttributes(func_test2_PAL);
 
       Function* func_test3 = Function::Create(
@@ -442,7 +447,7 @@ namespace llvm {
         /*Linkage=*/GlobalValue::ExternalLinkage,
         /*Name=*/"test3", mod);
       func_test3->setCallingConv(CallingConv::C);
-      AttrListPtr func_test3_PAL;
+      AttributeSet func_test3_PAL;
       func_test3->setAttributes(func_test3_PAL);
 
       Function* func_test4 = Function::Create(
@@ -450,7 +455,7 @@ namespace llvm {
         /*Linkage=*/GlobalValue::ExternalLinkage,
         /*Name=*/"test4", mod);
       func_test4->setCallingConv(CallingConv::C);
-      AttrListPtr func_test4_PAL;
+      AttributeSet func_test4_PAL;
       func_test4->setAttributes(func_test4_PAL);
 
       // Global Variable Declarations
@@ -470,7 +475,7 @@ namespace llvm {
         // Block entry (label_entry)
         CallInst* int32_3 = CallInst::Create(func_test2, "", label_entry);
         int32_3->setCallingConv(CallingConv::C);
-        int32_3->setTailCall(false);AttrListPtr int32_3_PAL;
+        int32_3->setTailCall(false);AttributeSet int32_3_PAL;
         int32_3->setAttributes(int32_3_PAL);
 
         ReturnInst::Create(getGlobalContext(), int32_3, label_entry);
@@ -485,7 +490,7 @@ namespace llvm {
         // Block entry (label_entry_5)
         CallInst* int32_6 = CallInst::Create(func_test3, "", label_entry_5);
         int32_6->setCallingConv(CallingConv::C);
-        int32_6->setTailCall(false);AttrListPtr int32_6_PAL;
+        int32_6->setTailCall(false);AttributeSet int32_6_PAL;
         int32_6->setAttributes(int32_6_PAL);
 
         ReturnInst::Create(getGlobalContext(), int32_6, label_entry_5);
@@ -500,7 +505,7 @@ namespace llvm {
         // Block entry (label_entry_8)
         CallInst* int32_9 = CallInst::Create(func_test1, "", label_entry_8);
         int32_9->setCallingConv(CallingConv::C);
-        int32_9->setTailCall(false);AttrListPtr int32_9_PAL;
+        int32_9->setTailCall(false);AttributeSet int32_9_PAL;
         int32_9->setAttributes(int32_9_PAL);
 
         ReturnInst::Create(getGlobalContext(), int32_9, label_entry_8);
diff --git a/unittests/VMCore/TypeBuilderTest.cpp b/unittests/VMCore/TypeBuilderTest.cpp
index a746b1f738..51face076d 100644
--- a/unittests/VMCore/TypeBuilderTest.cpp
+++ b/unittests/VMCore/TypeBuilderTest.cpp
@@ -8,9 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/TypeBuilder.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/ADT/ArrayRef.h"
-
+#include "llvm/LLVMContext.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/VMCore/ValueMapTest.cpp b/unittests/VMCore/ValueMapTest.cpp
index 9bed37dff3..36d371305e 100644
--- a/unittests/VMCore/ValueMapTest.cpp
+++ b/unittests/VMCore/ValueMapTest.cpp
@@ -8,12 +8,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/ValueMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Config/llvm-config.h"
-
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/unittests/VMCore/VerifierTest.cpp b/unittests/VMCore/VerifierTest.cpp
index 324b4e193b..872c7a4a81 100644
--- a/unittests/VMCore/VerifierTest.cpp
+++ b/unittests/VMCore/VerifierTest.cpp
@@ -7,6 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
@@ -15,8 +17,6 @@
 #include "llvm/Instructions.h"
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/Analysis/Verifier.h"
 #include "gtest/gtest.h"
 
 namespace llvm {
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
index c5524656f6..a0eeb0edff 100644
--- a/utils/FileCheck/FileCheck.cpp
+++ b/utils/FileCheck/FileCheck.cpp
@@ -17,18 +17,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Regex.h"
+#include "llvm/Support/Signals.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Signals.h"
 #include "llvm/Support/system_error.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringMap.h"
 #include <algorithm>
+#include <map>
+#include <string>
+#include <vector>
 using namespace llvm;
 
 static cl::opt<std::string>
@@ -73,16 +76,19 @@ class Pattern {
   /// value of bar at offset 3.
   std::vector<std::pair<StringRef, unsigned> > VariableUses;
 
-  /// VariableDefs - Entries in this vector map to definitions of a variable in
-  /// the pattern, e.g. "foo[[bar:.*]]baz".  In this case, the RegExStr will
-  /// contain "foo(.*)baz" and VariableDefs will contain the pair "bar",1.  The
-  /// index indicates what parenthesized value captures the variable value.
-  std::vector<std::pair<StringRef, unsigned> > VariableDefs;
+  /// VariableDefs - Maps definitions of variables to their parenthesized
+  /// capture numbers.
+  /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
+  std::map<StringRef, unsigned> VariableDefs;
 
 public:
 
   Pattern(bool matchEOF = false) : MatchEOF(matchEOF) { }
 
+  /// ParsePattern - Parse the given string into the Pattern.  SM provides the
+  /// SourceMgr used for error reports, and LineNumber is the line number in
+  /// the input file from which the pattern string was read.
+  /// Returns true in case of an error, false otherwise.
   bool ParsePattern(StringRef PatternStr, SourceMgr &SM, unsigned LineNumber);
 
   /// Match - Match the pattern string against the input buffer Buffer.  This
@@ -101,7 +107,8 @@ public:
 
 private:
   static void AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr);
-  bool AddRegExToRegEx(StringRef RegExStr, unsigned &CurParen, SourceMgr &SM);
+  bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
+  void AddBackrefToRegEx(unsigned BackrefNum);
 
   /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
   /// matching this pattern at the start of \arg Buffer; a distance of zero
@@ -112,6 +119,13 @@ private:
   /// \brief Evaluates expression and stores the result to \p Value.
   /// \return true on success. false when the expression has invalid syntax.
   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
+
+  /// \brief Finds the closing sequence of a regex variable usage or
+  /// definition. Str has to point in the beginning of the definition
+  /// (right after the opening sequence).
+  /// \return offset of the closing sequence within Str, or npos if it was not
+  /// found.
+  size_t FindRegexVarEnd(StringRef Str);
 };
 
 
@@ -150,8 +164,7 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM,
   while (!PatternStr.empty()) {
     // RegEx matches.
     if (PatternStr.startswith("{{")) {
-
-      // Otherwise, this is the start of a regex match.  Scan for the }}.
+      // This is the start of a regex match.  Scan for the }}.
       size_t End = PatternStr.find("}}");
       if (End == StringRef::npos) {
         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
@@ -181,8 +194,10 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM,
     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
     // it.  This is to catch some common errors.
     if (PatternStr.startswith("[[")) {
-      // Verify that it is terminated properly.
-      size_t End = PatternStr.find("]]");
+      // Find the closing bracket pair ending the match.  End is going to be an
+      // offset relative to the beginning of the match string.
+      size_t End = FindRegexVarEnd(PatternStr.substr(2));
+
       if (End == StringRef::npos) {
         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
                         SourceMgr::DK_Error,
@@ -190,8 +205,8 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM,
         return true;
       }
 
-      StringRef MatchStr = PatternStr.substr(2, End-2);
-      PatternStr = PatternStr.substr(End+2);
+      StringRef MatchStr = PatternStr.substr(2, End);
+      PatternStr = PatternStr.substr(End+4);
 
       // Get the regex name (e.g. "foo").
       size_t NameEnd = MatchStr.find(':');
@@ -235,12 +250,25 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM,
 
       // Handle [[foo]].
       if (NameEnd == StringRef::npos) {
-        VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
+        // Handle variables that were defined earlier on the same line by
+        // emitting a backreference.
+        if (VariableDefs.find(Name) != VariableDefs.end()) {
+          unsigned VarParenNum = VariableDefs[Name];
+          if (VarParenNum < 1 || VarParenNum > 9) {
+            SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
+                            SourceMgr::DK_Error,
+                            "Can't back-reference more than 9 variables");
+            return true;
+          }
+          AddBackrefToRegEx(VarParenNum);
+        } else {
+          VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
+        }
         continue;
       }
 
       // Handle [[foo:.*]].
-      VariableDefs.push_back(std::make_pair(Name, CurParen));
+      VariableDefs[Name] = CurParen;
       RegExStr += '(';
       ++CurParen;
 
@@ -288,21 +316,28 @@ void Pattern::AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr) {
   }
 }
 
-bool Pattern::AddRegExToRegEx(StringRef RegexStr, unsigned &CurParen,
+bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
                               SourceMgr &SM) {
-  Regex R(RegexStr);
+  Regex R(RS);
   std::string Error;
   if (!R.isValid(Error)) {
-    SM.PrintMessage(SMLoc::getFromPointer(RegexStr.data()), SourceMgr::DK_Error,
+    SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
                     "invalid regex: " + Error);
     return true;
   }
 
-  RegExStr += RegexStr.str();
+  RegExStr += RS.str();
   CurParen += R.getNumMatches();
   return false;
 }
 
+void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
+  assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
+  std::string Backref = std::string("\\") +
+                        std::string(1, '0' + BackrefNum);
+  RegExStr += Backref;
+}
+
 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
   // The only supported expression is @LINE([\+-]\d+)?
   if (!Expr.startswith("@LINE"))
@@ -385,10 +420,11 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
   StringRef FullMatch = MatchInfo[0];
 
   // If this defines any variables, remember their values.
-  for (unsigned i = 0, e = VariableDefs.size(); i != e; ++i) {
-    assert(VariableDefs[i].second < MatchInfo.size() &&
-           "Internal paren error");
-    VariableTable[VariableDefs[i].first] = MatchInfo[VariableDefs[i].second];
+  for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
+                                                     E = VariableDefs.end();
+       I != E; ++I) {
+    assert(I->second < MatchInfo.size() && "Internal paren error");
+    VariableTable[I->first] = MatchInfo[I->second];
   }
 
   MatchLen = FullMatch.size();
@@ -492,6 +528,40 @@ void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
   }
 }
 
+size_t Pattern::FindRegexVarEnd(StringRef Str) {
+  // Offset keeps track of the current offset within the input Str
+  size_t Offset = 0;
+  // [...] Nesting depth
+  size_t BracketDepth = 0;
+
+  while (!Str.empty()) {
+    if (Str.startswith("]]") && BracketDepth == 0)
+      return Offset;
+    if (Str[0] == '\\') {
+      // Backslash escapes the next char within regexes, so skip them both.
+      Str = Str.substr(2);
+      Offset += 2;
+    } else {
+      switch (Str[0]) {
+        default:
+          break;
+        case '[':
+          BracketDepth++;
+          break;
+        case ']':
+          assert(BracketDepth > 0 && "Invalid regex");
+          BracketDepth--;
+          break;
+      }
+      Str = Str.substr(1);
+      Offset++;
+    }
+  }
+
+  return StringRef::npos;
+}
+
+
 //===----------------------------------------------------------------------===//
 // Check Strings.
 //===----------------------------------------------------------------------===//
@@ -554,9 +624,9 @@ static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {
 
 /// ReadCheckFile - Read the check file, which specifies the sequence of
 /// expected strings.  The strings are added to the CheckStrings vector.
+/// Returns true in case of an error, false otherwise.
 static bool ReadCheckFile(SourceMgr &SM,
                           std::vector<CheckString> &CheckStrings) {
-  // Open the check file, and tell SourceMgr about it.
   OwningPtr<MemoryBuffer> File;
   if (error_code ec =
         MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), File)) {
@@ -575,9 +645,10 @@ static bool ReadCheckFile(SourceMgr &SM,
 
   // Find all instances of CheckPrefix followed by : in the file.
   StringRef Buffer = F->getBuffer();
-
   std::vector<std::pair<SMLoc, Pattern> > NotMatches;
 
+  // LineNumber keeps track of the line on which CheckPrefix instances are
+  // found.
   unsigned LineNumber = 1;
 
   while (1) {
@@ -587,7 +658,6 @@ static bool ReadCheckFile(SourceMgr &SM,
     if (PrefixLoc == StringRef::npos)
       break;
 
-    // Recalculate line number.
     LineNumber += Buffer.substr(0, PrefixLoc).count('\n');
 
     Buffer = Buffer.substr(PrefixLoc);
@@ -631,7 +701,6 @@ static bool ReadCheckFile(SourceMgr &SM,
 
     Buffer = Buffer.substr(EOL);
 
-
     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
     if (IsCheckNext && CheckStrings.empty()) {
       SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart),
@@ -648,7 +717,6 @@ static bool ReadCheckFile(SourceMgr &SM,
       continue;
     }
 
-
     // Okay, add the string we captured to the output vector and move on.
     CheckStrings.push_back(CheckString(P,
                                        PatternLoc,
@@ -729,13 +797,13 @@ int main(int argc, char **argv) {
         MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), File)) {
     errs() << "Could not open input file '" << InputFilename << "': "
            << ec.message() << '\n';
-    return true;
+    return 2;
   }
   MemoryBuffer *F = File.take();
 
   if (F->getBufferSize() == 0) {
     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
-    return 1;
+    return 2;
   }
   
   // Remove duplicate spaces in the input file if requested.
diff --git a/utils/FileUpdate/FileUpdate.cpp b/utils/FileUpdate/FileUpdate.cpp
index 3ea1e4f306..9b48f94948 100644
--- a/utils/FileUpdate/FileUpdate.cpp
+++ b/utils/FileUpdate/FileUpdate.cpp
@@ -13,12 +13,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/system_error.h"
 using namespace llvm;
 
diff --git a/utils/KillTheDoctor/KillTheDoctor.cpp b/utils/KillTheDoctor/KillTheDoctor.cpp
index 70713b25bf..feba2e54f6 100644
--- a/utils/KillTheDoctor/KillTheDoctor.cpp
+++ b/utils/KillTheDoctor/KillTheDoctor.cpp
@@ -39,19 +39,22 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/type_traits.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
+#include "llvm/Support/type_traits.h"
 #include <algorithm>
 #include <cerrno>
 #include <cstdlib>
 #include <map>
 #include <string>
+
+// These includes must be last.
 #include <Windows.h>
 #include <WinError.h>
 #include <Dbghelp.h>
 #include <psapi.h>
+
 using namespace llvm;
 
 #undef max
diff --git a/utils/PerfectShuffle/PerfectShuffle.cpp b/utils/PerfectShuffle/PerfectShuffle.cpp
index 98f8f4cc0c..d39414eede 100644
--- a/utils/PerfectShuffle/PerfectShuffle.cpp
+++ b/utils/PerfectShuffle/PerfectShuffle.cpp
@@ -14,11 +14,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <iostream>
-#include <iomanip>
-#include <vector>
 #include <cassert>
 #include <cstdlib>
+#include <iomanip>
+#include <iostream>
+#include <vector>
 struct Operator;
 
 // Masks are 4-nibble hex numbers.  Values 0-7 in any nibble means that it takes
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index ee83311c58..bc0cf4368f 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -100,9 +100,9 @@
 #include "StringToOffsetTable.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index d0416c9081..0527aa6180 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -24,6 +24,7 @@ add_tablegen(llvm-tblgen LLVM
   FixedLenDecoderEmitter.cpp
   InstrInfoEmitter.cpp
   IntrinsicEmitter.cpp
+  OptParserEmitter.cpp
   PseudoLoweringEmitter.cpp
   RegisterInfoEmitter.cpp
   SetTheory.cpp
diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp
index 3e4f626d48..c94d384901 100644
--- a/utils/TableGen/CodeEmitterGen.cpp
+++ b/utils/TableGen/CodeEmitterGen.cpp
@@ -14,10 +14,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenTarget.h"
-#include "llvm/TableGen/Record.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
 #include <map>
 #include <string>
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index d5b581b598..72fb77150c 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -13,13 +13,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenDAGPatterns.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
 #include <algorithm>
 #include <cstdio>
 #include <set>
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index 9be763f2ff..424f02f7ab 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -15,15 +15,15 @@
 #ifndef CODEGEN_DAGPATTERNS_H
 #define CODEGEN_DAGPATTERNS_H
 
-#include "CodeGenTarget.h"
 #include "CodeGenIntrinsics.h"
+#include "CodeGenTarget.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/ErrorHandling.h"
-#include <set>
 #include <algorithm>
-#include <vector>
 #include <map>
+#include <set>
+#include <vector>
 
 namespace llvm {
   class Record;
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 0a8684d3da..367320498f 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -13,11 +13,11 @@
 
 #include "CodeGenInstruction.h"
 #include "CodeGenTarget.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
 #include <set>
 using namespace llvm;
 
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index 55d44399df..d1e1153554 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -14,12 +14,12 @@
 #ifndef CODEGEN_INSTRUCTION_H
 #define CODEGEN_INSTRUCTION_H
 
-#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/SourceMgr.h"
 #include <string>
-#include <vector>
 #include <utility>
+#include <vector>
 
 namespace llvm {
   class Record;
diff --git a/utils/TableGen/CodeGenIntrinsics.h b/utils/TableGen/CodeGenIntrinsics.h
index 6efe952ea2..f0570f95b8 100644
--- a/utils/TableGen/CodeGenIntrinsics.h
+++ b/utils/TableGen/CodeGenIntrinsics.h
@@ -14,9 +14,9 @@
 #ifndef CODEGEN_INTRINSIC_H
 #define CODEGEN_INTRINSIC_H
 
+#include "llvm/CodeGen/ValueTypes.h"
 #include <string>
 #include <vector>
-#include "llvm/CodeGen/ValueTypes.h"
 
 namespace llvm {
   class Record;
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index 580e319f24..20d439f8f7 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -14,12 +14,12 @@
 
 #include "CodeGenRegisters.h"
 #include "CodeGenTarget.h"
-#include "llvm/TableGen/Error.h"
 #include "llvm/ADT/IntEqClasses.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/TableGen/Error.h"
 
 using namespace llvm;
 
@@ -1589,6 +1589,35 @@ void CodeGenRegBank::computeRegUnitSets() {
     }
     assert(!RegClassUnitSets[RCIdx].empty() && "missing unit set for regclass");
   }
+
+  // For each register unit, ensure that we have the list of UnitSets that
+  // contain the unit. Normally, this matches an existing list of UnitSets for a
+  // register class. If not, we create a new entry in RegClassUnitSets as a
+  // "fake" register class.
+  for (unsigned UnitIdx = 0, UnitEnd = NumNativeRegUnits;
+       UnitIdx < UnitEnd; ++UnitIdx) {
+    std::vector<unsigned> RUSets;
+    for (unsigned i = 0, e = RegUnitSets.size(); i != e; ++i) {
+      RegUnitSet &RUSet = RegUnitSets[i];
+      if (std::find(RUSet.Units.begin(), RUSet.Units.end(), UnitIdx)
+          == RUSet.Units.end())
+        continue;
+      RUSets.push_back(i);
+    }
+    unsigned RCUnitSetsIdx = 0;
+    for (unsigned e = RegClassUnitSets.size();
+         RCUnitSetsIdx != e; ++RCUnitSetsIdx) {
+      if (RegClassUnitSets[RCUnitSetsIdx] == RUSets) {
+        break;
+      }
+    }
+    RegUnits[UnitIdx].RegClassUnitSetsIdx = RCUnitSetsIdx;
+    if (RCUnitSetsIdx == RegClassUnitSets.size()) {
+      // Create a new list of UnitSets as a "fake" register class.
+      RegClassUnitSets.resize(RCUnitSetsIdx + 1);
+      RegClassUnitSets[RCUnitSetsIdx].swap(RUSets);
+    }
+  }
 }
 
 void CodeGenRegBank::computeDerivedInfo() {
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index e411074156..a1921a412a 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -16,17 +16,17 @@
 #define CODEGEN_REGISTERS_H
 
 #include "SetTheory.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/TableGen/Record.h"
 #include <cstdlib>
 #include <map>
-#include <string>
 #include <set>
+#include <string>
 #include <vector>
 
 namespace llvm {
@@ -403,7 +403,11 @@ namespace llvm {
     // these two registers and their super-registers.
     const CodeGenRegister *Roots[2];
 
-    RegUnit() : Weight(0) { Roots[0] = Roots[1] = 0; }
+    // Index into RegClassUnitSets where we can find the list of UnitSets that
+    // contain this unit.
+    unsigned RegClassUnitSetsIdx;
+
+    RegUnit() : Weight(0), RegClassUnitSetsIdx(0) { Roots[0] = Roots[1] = 0; }
 
     ArrayRef<const CodeGenRegister*> getRoots() const {
       assert(!(Roots[1] && !Roots[0]) && "Invalid roots array");
@@ -462,6 +466,10 @@ namespace llvm {
 
     // Map RegisterClass index to the index of the RegUnitSet that contains the
     // class's units and any inferred RegUnit supersets.
+    //
+    // NOTE: This could grow beyond the number of register classes when we map
+    // register units to lists of unit sets. If the list of unit sets does not
+    // already exist for a register class, we create a new entry in this vector.
     std::vector<std::vector<unsigned> > RegClassUnitSets;
 
     // Add RC to *2RC maps.
@@ -615,6 +623,13 @@ namespace llvm {
       return RegUnitSets[Idx];
     }
 
+    // The number of pressure set lists may be larget than the number of
+    // register classes if some register units appeared in a list of sets that
+    // did not correspond to an existing register class.
+    unsigned getNumRegClassPressureSetLists() const {
+      return RegClassUnitSets.size();
+    }
+
     // Get a list of pressure set IDs for a register class. Liveness of a
     // register in this class impacts each pressure set in this list by the
     // weight of the register. An exact solution requires all registers in a
diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp
index 63cc97a8c1..c653c49f25 100644
--- a/utils/TableGen/CodeGenSchedule.cpp
+++ b/utils/TableGen/CodeGenSchedule.cpp
@@ -16,10 +16,10 @@
 
 #include "CodeGenSchedule.h"
 #include "CodeGenTarget.h"
-#include "llvm/TableGen/Error.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Regex.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/TableGen/Error.h"
 
 using namespace llvm;
 
diff --git a/utils/TableGen/CodeGenSchedule.h b/utils/TableGen/CodeGenSchedule.h
index eed058971b..dc927e68ff 100644
--- a/utils/TableGen/CodeGenSchedule.h
+++ b/utils/TableGen/CodeGenSchedule.h
@@ -16,10 +16,10 @@
 #define CODEGEN_SCHEDULE_H
 
 #include "SetTheory.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/TableGen/Record.h"
 
 namespace llvm {
 
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index bd55e697c5..a2289e2bba 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -17,11 +17,11 @@
 #include "CodeGenTarget.h"
 #include "CodeGenIntrinsics.h"
 #include "CodeGenSchedule.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
 #include <algorithm>
 using namespace llvm;
 
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index ddeecee36f..4a1c6d8fcb 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -17,10 +17,10 @@
 #ifndef CODEGEN_TARGET_H
 #define CODEGEN_TARGET_H
 
-#include "CodeGenRegisters.h"
 #include "CodeGenInstruction.h"
-#include "llvm/TableGen/Record.h"
+#include "CodeGenRegisters.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Record.h"
 #include <algorithm>
 
 namespace llvm {
diff --git a/utils/TableGen/DAGISelMatcher.cpp b/utils/TableGen/DAGISelMatcher.cpp
index bd77907a9b..d173cf006a 100644
--- a/utils/TableGen/DAGISelMatcher.cpp
+++ b/utils/TableGen/DAGISelMatcher.cpp
@@ -10,9 +10,9 @@
 #include "DAGISelMatcher.h"
 #include "CodeGenDAGPatterns.h"
 #include "CodeGenTarget.h"
-#include "llvm/TableGen/Record.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Record.h"
 using namespace llvm;
 
 void Matcher::anchor() { }
diff --git a/utils/TableGen/DAGISelMatcher.h b/utils/TableGen/DAGISelMatcher.h
index 7c6ce3babc..f978188aae 100644
--- a/utils/TableGen/DAGISelMatcher.h
+++ b/utils/TableGen/DAGISelMatcher.h
@@ -10,10 +10,10 @@
 #ifndef TBLGEN_DAGISELMATCHER_H
 #define TBLGEN_DAGISELMATCHER_H
 
-#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Support/Casting.h"
 
 namespace llvm {
diff --git a/utils/TableGen/DAGISelMatcherEmitter.cpp b/utils/TableGen/DAGISelMatcherEmitter.cpp
index 713f1743c1..4345708783 100644
--- a/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -13,12 +13,12 @@
 
 #include "DAGISelMatcher.h"
 #include "CodeGenDAGPatterns.h"
-#include "llvm/TableGen/Record.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/TableGen/Record.h"
 using namespace llvm;
 
 enum {
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index 573f55875e..38ffa30ec8 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -10,11 +10,11 @@
 #include "DAGISelMatcher.h"
 #include "CodeGenDAGPatterns.h"
 #include "CodeGenRegisters.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
 #include <utility>
 using namespace llvm;
 
diff --git a/utils/TableGen/DFAPacketizerEmitter.cpp b/utils/TableGen/DFAPacketizerEmitter.cpp
index 0ad25a5428..2549c47c33 100644
--- a/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -279,6 +279,7 @@ DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
 //
 //
 void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
+  static const std::string SentinelEntry = "{-1, -1}";
   DFA::StateSet::iterator SI = states.begin();
   // This table provides a map to the beginning of the transitions for State s
   // in DFAStateInputTable.
@@ -305,12 +306,17 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
     // If there are no valid transitions from this stage, we need a sentinel
     // transition.
     if (ValidTransitions == StateEntry[i]) {
-      OS << "{-1, -1},";
+      OS << SentinelEntry << ",";
       ++ValidTransitions;
     }
 
     OS << "\n";
   }
+
+  // Print out a sentinel entry at the end of the StateInputTable. This is
+  // needed to iterate over StateInputTable in DFAPacketizer::ReadTable()
+  OS << SentinelEntry << "\n";
+  
   OS << "};\n\n";
   OS << "const unsigned int " << TargetName << "DFAStateEntryTable[] = {\n";
 
@@ -319,6 +325,9 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
   for (unsigned i = 0; i < states.size(); ++i)
     OS << StateEntry[i] << ", ";
 
+  // Print out the index to the sentinel entry in StateInputTable
+  OS << ValidTransitions << ", ";
+
   OS << "\n};\n";
   OS << "} // namespace\n";
 
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
index 5cabcadabd..b2dc878880 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -15,8 +15,6 @@
 #define DEBUG_TYPE "decoder-emitter"
 
 #include "CodeGenTarget.h"
-#include "llvm/TableGen/Error.h"
-#include "llvm/TableGen/Record.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
@@ -28,11 +26,12 @@
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
-
-#include <vector>
 #include <map>
 #include <string>
+#include <vector>
 
 using namespace llvm;
 
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 48d41d7b96..d6020a8461 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -16,8 +16,8 @@
 #include "CodeGenDAGPatterns.h"
 #include "CodeGenSchedule.h"
 #include "CodeGenTarget.h"
-#include "TableGenBackends.h"
 #include "SequenceToOffsetTable.h"
+#include "TableGenBackends.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
@@ -271,7 +271,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
 
   std::string ClassName = TargetName + "GenInstrInfo";
   OS << "namespace llvm {\n";
-  OS << "struct " << ClassName << " : public TargetInstrInfoImpl {\n"
+  OS << "struct " << ClassName << " : public TargetInstrInfo {\n"
      << "  explicit " << ClassName << "(int SO = -1, int DO = -1);\n"
      << "};\n";
   OS << "} // End llvm namespace \n";
@@ -286,7 +286,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
   OS << "extern const unsigned " << TargetName << "InstrNameIndices[];\n";
   OS << "extern const char " << TargetName << "InstrNameData[];\n";
   OS << ClassName << "::" << ClassName << "(int SO, int DO)\n"
-     << "  : TargetInstrInfoImpl(SO, DO) {\n"
+     << "  : TargetInstrInfo(SO, DO) {\n"
      << "  InitMCInstrInfo(" << TargetName << "Insts, "
      << TargetName << "InstrNameIndices, " << TargetName << "InstrNameData, "
      << NumberedInstructions.size() << ");\n}\n";
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index fe55242930..967c58b3d2 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -511,10 +511,10 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
   OS << "// Add parameter attributes that are not common to all intrinsics.\n";
   OS << "#ifdef GET_INTRINSIC_ATTRIBUTES\n";
   if (TargetOnly)
-    OS << "static AttrListPtr getAttributes(LLVMContext &C, " << TargetPrefix
+    OS << "static AttributeSet getAttributes(LLVMContext &C, " << TargetPrefix
        << "Intrinsic::ID id) {\n";
   else
-    OS << "AttrListPtr Intrinsic::getAttributes(LLVMContext &C, ID id) {\n";
+    OS << "AttributeSet Intrinsic::getAttributes(LLVMContext &C, ID id) {\n";
 
   // Compute the maximum number of attribute arguments and the map
   typedef std::map<const CodeGenIntrinsic*, unsigned,
@@ -608,20 +608,20 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
         break;
       }
       OS << "      AWI[" << numAttrs++ << "] = AttributeWithIndex::get(C, "
-         << "AttrListPtr::FunctionIndex, AttrVec);\n";
+         << "AttributeSet::FunctionIndex, AttrVec);\n";
     }
 
     if (numAttrs) {
       OS << "      NumAttrs = " << numAttrs << ";\n";
       OS << "      break;\n";
     } else {
-      OS << "      return AttrListPtr();\n";
+      OS << "      return AttributeSet();\n";
     }
   }
   
   OS << "    }\n";
   OS << "  }\n";
-  OS << "  return AttrListPtr::get(C, ArrayRef<AttributeWithIndex>(AWI, "
+  OS << "  return AttributeSet::get(C, ArrayRef<AttributeWithIndex>(AWI, "
              "NumAttrs));\n";
   OS << "}\n";
   OS << "#endif // GET_INTRINSIC_ATTRIBUTES\n\n";
diff --git a/utils/TableGen/OptParserEmitter.cpp b/utils/TableGen/OptParserEmitter.cpp
new file mode 100644
index 0000000000..5dab9e63a0
--- /dev/null
+++ b/utils/TableGen/OptParserEmitter.cpp
@@ -0,0 +1,267 @@
+//===- OptParserEmitter.cpp - Table Driven Command Line Parsing -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+
+#include <map>
+
+using namespace llvm;
+
+static int StrCmpOptionName(const char *A, const char *B) {
+  char a = *A, b = *B;
+  while (a == b) {
+    if (a == '\0')
+      return 0;
+
+    a = *++A;
+    b = *++B;
+  }
+
+  if (a == '\0') // A is a prefix of B.
+    return 1;
+  if (b == '\0') // B is a prefix of A.
+    return -1;
+
+  // Otherwise lexicographic.
+  return (a < b) ? -1 : 1;
+}
+
+static int CompareOptionRecords(const void *Av, const void *Bv) {
+  const Record *A = *(const Record*const*) Av;
+  const Record *B = *(const Record*const*) Bv;
+
+  // Sentinel options precede all others and are only ordered by precedence.
+  bool ASent = A->getValueAsDef("Kind")->getValueAsBit("Sentinel");
+  bool BSent = B->getValueAsDef("Kind")->getValueAsBit("Sentinel");
+  if (ASent != BSent)
+    return ASent ? -1 : 1;
+
+  // Compare options by name, unless they are sentinels.
+  if (!ASent)
+    if (int Cmp = StrCmpOptionName(A->getValueAsString("Name").c_str(),
+                                   B->getValueAsString("Name").c_str()))
+    return Cmp;
+
+  if (!ASent) {
+    std::vector<std::string> APrefixes = A->getValueAsListOfStrings("Prefixes");
+    std::vector<std::string> BPrefixes = B->getValueAsListOfStrings("Prefixes");
+
+    for (std::vector<std::string>::const_iterator APre = APrefixes.begin(),
+                                                  AEPre = APrefixes.end(),
+                                                  BPre = BPrefixes.begin(),
+                                                  BEPre = BPrefixes.end();
+                                                  APre != AEPre &&
+                                                  BPre != BEPre;
+                                                  ++APre, ++BPre) {
+      if (int Cmp = StrCmpOptionName(APre->c_str(), BPre->c_str()))
+        return Cmp;
+    }
+  }
+
+  // Then by the kind precedence;
+  int APrec = A->getValueAsDef("Kind")->getValueAsInt("Precedence");
+  int BPrec = B->getValueAsDef("Kind")->getValueAsInt("Precedence");
+  if (APrec == BPrec &&
+      A->getValueAsListOfStrings("Prefixes") ==
+      B->getValueAsListOfStrings("Prefixes")) {
+    PrintError(A->getLoc(), Twine("Option is equivilent to"));
+    PrintError(B->getLoc(), Twine("Other defined here"));
+    PrintFatalError("Equivalent Options found.");
+  }
+  return APrec < BPrec ? -1 : 1;
+}
+
+static const std::string getOptionName(const Record &R) {
+  // Use the record name unless EnumName is defined.
+  if (isa<UnsetInit>(R.getValueInit("EnumName")))
+    return R.getName();
+
+  return R.getValueAsString("EnumName");
+}
+
+static raw_ostream &write_cstring(raw_ostream &OS, llvm::StringRef Str) {
+  OS << '"';
+  OS.write_escaped(Str);
+  OS << '"';
+  return OS;
+}
+
+/// OptParserEmitter - This tablegen backend takes an input .td file
+/// describing a list of options and emits a data structure for parsing and
+/// working with those options when given an input command line.
+namespace llvm {
+void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
+  // Get the option groups and options.
+  const std::vector<Record*> &Groups =
+    Records.getAllDerivedDefinitions("OptionGroup");
+  std::vector<Record*> Opts = Records.getAllDerivedDefinitions("Option");
+
+  emitSourceFileHeader("Option Parsing Definitions", OS);
+
+  array_pod_sort(Opts.begin(), Opts.end(), CompareOptionRecords);
+  // Generate prefix groups.
+  typedef SmallVector<SmallString<2>, 2> PrefixKeyT;
+  typedef std::map<PrefixKeyT, std::string> PrefixesT;
+  PrefixesT Prefixes;
+  Prefixes.insert(std::make_pair(PrefixKeyT(), "prefix_0"));
+  unsigned CurPrefix = 0;
+  for (unsigned i = 0, e = Opts.size(); i != e; ++i) {
+    const Record &R = *Opts[i];
+    std::vector<std::string> prf = R.getValueAsListOfStrings("Prefixes");
+    PrefixKeyT prfkey(prf.begin(), prf.end());
+    unsigned NewPrefix = CurPrefix + 1;
+    if (Prefixes.insert(std::make_pair(prfkey, (Twine("prefix_") +
+                                              Twine(NewPrefix)).str())).second)
+      CurPrefix = NewPrefix;
+  }
+
+  // Dump prefixes.
+
+  OS << "/////////\n";
+  OS << "// Prefixes\n\n";
+  OS << "#ifdef PREFIX\n";
+  OS << "#define COMMA ,\n";
+  for (PrefixesT::const_iterator I = Prefixes.begin(), E = Prefixes.end();
+                                  I != E; ++I) {
+    OS << "PREFIX(";
+
+    // Prefix name.
+    OS << I->second;
+
+    // Prefix values.
+    OS << ", {";
+    for (PrefixKeyT::const_iterator PI = I->first.begin(),
+                                    PE = I->first.end(); PI != PE; ++PI) {
+      OS << "\"" << *PI << "\" COMMA ";
+    }
+    OS << "0})\n";
+  }
+  OS << "#undef COMMA\n";
+  OS << "#endif\n\n";
+
+  OS << "/////////\n";
+  OS << "// Groups\n\n";
+  OS << "#ifdef OPTION\n";
+  for (unsigned i = 0, e = Groups.size(); i != e; ++i) {
+    const Record &R = *Groups[i];
+
+    // Start a single option entry.
+    OS << "OPTION(";
+
+    // The option prefix;
+    OS << "0";
+
+    // The option string.
+    OS << ", \"" << R.getValueAsString("Name") << '"';
+
+    // The option identifier name.
+    OS  << ", "<< getOptionName(R);
+
+    // The option kind.
+    OS << ", Group";
+
+    // The containing option group (if any).
+    OS << ", ";
+    if (const DefInit *DI = dyn_cast<DefInit>(R.getValueInit("Group")))
+      OS << getOptionName(*DI->getDef());
+    else
+      OS << "INVALID";
+
+    // The other option arguments (unused for groups).
+    OS << ", INVALID, 0, 0";
+
+    // The option help text.
+    if (!isa<UnsetInit>(R.getValueInit("HelpText"))) {
+      OS << ",\n";
+      OS << "       ";
+      write_cstring(OS, R.getValueAsString("HelpText"));
+    } else
+      OS << ", 0";
+
+    // The option meta-variable name (unused).
+    OS << ", 0)\n";
+  }
+  OS << "\n";
+
+  OS << "//////////\n";
+  OS << "// Options\n\n";
+  for (unsigned i = 0, e = Opts.size(); i != e; ++i) {
+    const Record &R = *Opts[i];
+
+    // Start a single option entry.
+    OS << "OPTION(";
+
+    // The option prefix;
+    std::vector<std::string> prf = R.getValueAsListOfStrings("Prefixes");
+    OS << Prefixes[PrefixKeyT(prf.begin(), prf.end())] << ", ";
+
+    // The option string.
+    write_cstring(OS, R.getValueAsString("Name"));
+
+    // The option identifier name.
+    OS  << ", "<< getOptionName(R);
+
+    // The option kind.
+    OS << ", " << R.getValueAsDef("Kind")->getValueAsString("Name");
+
+    // The containing option group (if any).
+    OS << ", ";
+    if (const DefInit *DI = dyn_cast<DefInit>(R.getValueInit("Group")))
+      OS << getOptionName(*DI->getDef());
+    else
+      OS << "INVALID";
+
+    // The option alias (if any).
+    OS << ", ";
+    if (const DefInit *DI = dyn_cast<DefInit>(R.getValueInit("Alias")))
+      OS << getOptionName(*DI->getDef());
+    else
+      OS << "INVALID";
+
+    // The option flags.
+    const ListInit *LI = R.getValueAsListInit("Flags");
+    if (LI->empty()) {
+      OS << ", 0";
+    } else {
+      OS << ", ";
+      for (unsigned i = 0, e = LI->size(); i != e; ++i) {
+        if (i)
+          OS << " | ";
+        OS << cast<DefInit>(LI->getElement(i))->getDef()->getName();
+      }
+    }
+
+    // The option parameter field.
+    OS << ", " << R.getValueAsInt("NumArgs");
+
+    // The option help text.
+    if (!isa<UnsetInit>(R.getValueInit("HelpText"))) {
+      OS << ",\n";
+      OS << "       ";
+      write_cstring(OS, R.getValueAsString("HelpText"));
+    } else
+      OS << ", 0";
+
+    // The option meta-variable name.
+    OS << ", ";
+    if (!isa<UnsetInit>(R.getValueInit("MetaVarName")))
+      write_cstring(OS, R.getValueAsString("MetaVarName"));
+    else
+      OS << "0";
+
+    OS << ")\n";
+  }
+  OS << "#endif\n";
+}
+} // end namespace llvm
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index 95b6267238..66adb61a32 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -185,6 +185,36 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
      << "  return RCWeightTable[RC->getID()];\n"
      << "}\n\n";
 
+  // Reasonable targets (not ARMv7) have unit weight for all units, so don't
+  // bother generating a table.
+  bool RegUnitsHaveUnitWeight = true;
+  for (unsigned UnitIdx = 0, UnitEnd = RegBank.getNumNativeRegUnits();
+       UnitIdx < UnitEnd; ++UnitIdx) {
+    if (RegBank.getRegUnit(UnitIdx).Weight > 1)
+      RegUnitsHaveUnitWeight = false;
+  }
+  OS << "/// Get the weight in units of pressure for this register unit.\n"
+     << "unsigned " << ClassName << "::\n"
+     << "getRegUnitWeight(unsigned RegUnit) const {\n"
+     << "  assert(RegUnit < " << RegBank.getNumNativeRegUnits()
+     << " && \"invalid register unit\");\n";
+  if (!RegUnitsHaveUnitWeight) {
+    OS << "  static const uint8_t RUWeightTable[] = {\n    ";
+    for (unsigned UnitIdx = 0, UnitEnd = RegBank.getNumNativeRegUnits();
+         UnitIdx < UnitEnd; ++UnitIdx) {
+      const RegUnit &RU = RegBank.getRegUnit(UnitIdx);
+      assert(RU.Weight < 256 && "RegUnit too heavy");
+      OS << RU.Weight << ", ";
+    }
+    OS << "0 };\n"
+       << "  return RUWeightTable[RegUnit];\n";
+  }
+  else {
+    OS << "  // All register units have unit weight.\n"
+       << "  return 1;\n";
+  }
+  OS << "}\n\n";
+
   OS << "\n"
      << "// Get the number of dimensions of register pressure.\n"
      << "unsigned " << ClassName << "::getNumRegPressureSets() const {\n"
@@ -215,14 +245,13 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
      << "  return PressureLimitTable[Idx];\n"
      << "}\n\n";
 
-  OS << "/// Get the dimensions of register pressure "
-     << "impacted by this register class.\n"
-     << "/// Returns a -1 terminated array of pressure set IDs\n"
-     << "const int* " << ClassName << "::\n"
-     << "getRegClassPressureSets(const TargetRegisterClass *RC) const {\n"
-     << "  static const int RCSetsTable[] = {\n    ";
-  std::vector<unsigned> RCSetStarts(NumRCs);
-  for (unsigned i = 0, StartIdx = 0, e = NumRCs; i != e; ++i) {
+  // This table may be larger than NumRCs if some register units needed a list
+  // of unit sets that did not correspond to a register class.
+  unsigned NumRCUnitSets = RegBank.getNumRegClassPressureSetLists();
+  OS << "/// Table of pressure sets per register class or unit.\n"
+     << "static const int RCSetsTable[] = {\n    ";
+  std::vector<unsigned> RCSetStarts(NumRCUnitSets);
+  for (unsigned i = 0, StartIdx = 0, e = NumRCUnitSets; i != e; ++i) {
     RCSetStarts[i] = StartIdx;
     ArrayRef<unsigned> PSetIDs = RegBank.getRCPressureSetIDs(i);
     for (ArrayRef<unsigned>::iterator PSetI = PSetIDs.begin(),
@@ -230,10 +259,26 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
       OS << *PSetI << ",  ";
       ++StartIdx;
     }
-    OS << "-1,  \t// " << RegBank.getRegClasses()[i]->getName() << "\n    ";
+    OS << "-1,  \t// #" << RCSetStarts[i] << " ";
+    if (i < NumRCs)
+      OS << RegBank.getRegClasses()[i]->getName();
+    else {
+      OS << "inferred";
+      for (ArrayRef<unsigned>::iterator PSetI = PSetIDs.begin(),
+             PSetE = PSetIDs.end(); PSetI != PSetE; ++PSetI) {
+        OS << "~" << RegBank.getRegPressureSet(*PSetI).Name;
+      }
+    }
+    OS << "\n    ";
     ++StartIdx;
   }
-  OS << "-1 };\n";
+  OS << "-1 };\n\n";
+
+  OS << "/// Get the dimensions of register pressure impacted by this "
+     << "register class.\n"
+     << "/// Returns a -1 terminated array of pressure set IDs\n"
+     << "const int* " << ClassName << "::\n"
+     << "getRegClassPressureSets(const TargetRegisterClass *RC) const {\n";
   OS << "  static const unsigned RCSetStartTable[] = {\n    ";
   for (unsigned i = 0, e = NumRCs; i != e; ++i) {
     OS << RCSetStarts[i] << ",";
@@ -242,6 +287,23 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
      << "  unsigned SetListStart = RCSetStartTable[RC->getID()];\n"
      << "  return &RCSetsTable[SetListStart];\n"
      << "}\n\n";
+
+  OS << "/// Get the dimensions of register pressure impacted by this "
+     << "register unit.\n"
+     << "/// Returns a -1 terminated array of pressure set IDs\n"
+     << "const int* " << ClassName << "::\n"
+     << "getRegUnitPressureSets(unsigned RegUnit) const {\n"
+     << "  assert(RegUnit < " << RegBank.getNumNativeRegUnits()
+     << " && \"invalid register unit\");\n";
+  OS << "  static const unsigned RUSetStartTable[] = {\n    ";
+  for (unsigned UnitIdx = 0, UnitEnd = RegBank.getNumNativeRegUnits();
+       UnitIdx < UnitEnd; ++UnitIdx) {
+    OS << RCSetStarts[RegBank.getRegUnit(UnitIdx).RegClassUnitSetsIdx] << ",";
+  }
+  OS << "0 };\n"
+     << "  unsigned SetListStart = RUSetStartTable[RegUnit];\n"
+     << "  return &RCSetsTable[SetListStart];\n"
+     << "}\n\n";
 }
 
 void
@@ -729,7 +791,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   const std::string &TargetName = Target.getName();
 
   // Emit the shared table of differential lists.
-  OS << "extern const uint16_t " << TargetName << "RegDiffLists[] = {\n";
+  OS << "extern const MCPhysReg " << TargetName << "RegDiffLists[] = {\n";
   DiffSeqs.emit(OS, printDiff16);
   OS << "};\n\n";
 
@@ -907,11 +969,13 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target,
   }
   OS << "  virtual const RegClassWeight &getRegClassWeight("
      << "const TargetRegisterClass *RC) const;\n"
+     << "  virtual unsigned getRegUnitWeight(unsigned RegUnit) const;\n"
      << "  virtual unsigned getNumRegPressureSets() const;\n"
      << "  virtual const char *getRegPressureSetName(unsigned Idx) const;\n"
      << "  virtual unsigned getRegPressureSetLimit(unsigned Idx) const;\n"
      << "  virtual const int *getRegClassPressureSets("
      << "const TargetRegisterClass *RC) const;\n"
+     << "  virtual const int *getRegUnitPressureSets(unsigned RegUnit) const;\n"
      << "};\n\n";
 
   ArrayRef<CodeGenRegisterClass*> RegisterClasses = RegBank.getRegClasses();
@@ -1074,12 +1138,12 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
         OS << "\nstatic inline unsigned " << RC.getName()
            << "AltOrderSelect(const MachineFunction &MF) {"
            << RC.AltOrderSelect << "}\n\n"
-           << "static ArrayRef<uint16_t> " << RC.getName()
+           << "static ArrayRef<MCPhysReg> " << RC.getName()
            << "GetRawAllocationOrder(const MachineFunction &MF) {\n";
         for (unsigned oi = 1 , oe = RC.getNumOrders(); oi != oe; ++oi) {
           ArrayRef<Record*> Elems = RC.getOrder(oi);
           if (!Elems.empty()) {
-            OS << "  static const uint16_t AltOrder" << oi << "[] = {";
+            OS << "  static const MCPhysReg AltOrder" << oi << "[] = {";
             for (unsigned elem = 0; elem != Elems.size(); ++elem)
               OS << (elem ? ", " : " ") << getQualifiedName(Elems[elem]);
             OS << " };\n";
@@ -1087,11 +1151,11 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
         }
         OS << "  const MCRegisterClass &MCR = " << Target.getName()
            << "MCRegisterClasses[" << RC.getQualifiedName() + "RegClassID];\n"
-           << "  const ArrayRef<uint16_t> Order[] = {\n"
+           << "  const ArrayRef<MCPhysReg> Order[] = {\n"
            << "    makeArrayRef(MCR.begin(), MCR.getNumRegs()";
         for (unsigned oi = 1, oe = RC.getNumOrders(); oi != oe; ++oi)
           if (RC.getOrder(oi).empty())
-            OS << "),\n    ArrayRef<uint16_t>(";
+            OS << "),\n    ArrayRef<MCPhysReg>(";
           else
             OS << "),\n    makeArrayRef(AltOrder" << oi;
         OS << ")\n  };\n  const unsigned Select = " << RC.getName()
@@ -1194,7 +1258,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   // Emit the constructor of the class...
   OS << "extern const MCRegisterDesc " << TargetName << "RegDesc[];\n";
-  OS << "extern const uint16_t " << TargetName << "RegDiffLists[];\n";
+  OS << "extern const MCPhysReg " << TargetName << "RegDiffLists[];\n";
   OS << "extern const char " << TargetName << "RegStrings[];\n";
   OS << "extern const uint16_t " << TargetName << "RegUnitRoots[][2];\n";
   OS << "extern const uint16_t " << TargetName << "SubRegIdxLists[];\n";
@@ -1232,7 +1296,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
     assert(Regs && "Cannot expand CalleeSavedRegs instance");
 
     // Emit the *_SaveList list of callee-saved registers.
-    OS << "static const uint16_t " << CSRSet->getName()
+    OS << "static const MCPhysReg " << CSRSet->getName()
        << "_SaveList[] = { ";
     for (unsigned r = 0, re = Regs->size(); r != re; ++r)
       OS << getQualifiedName((*Regs)[r]) << ", ";
diff --git a/utils/TableGen/SequenceToOffsetTable.h b/utils/TableGen/SequenceToOffsetTable.h
index d4db152a96..fcda233dc9 100644
--- a/utils/TableGen/SequenceToOffsetTable.h
+++ b/utils/TableGen/SequenceToOffsetTable.h
@@ -17,11 +17,11 @@
 #define TBLGEN_SEQUENCE_TO_OFFSET_TABLE_H
 
 #include "llvm/Support/raw_ostream.h"
-#include <functional>
 #include <algorithm>
-#include <vector>
 #include <cassert>
 #include <cctype>
+#include <functional>
+#include <vector>
 
 namespace llvm {
 
diff --git a/utils/TableGen/SetTheory.cpp b/utils/TableGen/SetTheory.cpp
index 0dd9853843..3e5c38cf0a 100644
--- a/utils/TableGen/SetTheory.cpp
+++ b/utils/TableGen/SetTheory.cpp
@@ -13,9 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "SetTheory.h"
+#include "llvm/Support/Format.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
-#include "llvm/Support/Format.h"
 
 using namespace llvm;
 
diff --git a/utils/TableGen/SetTheory.h b/utils/TableGen/SetTheory.h
index 122372ab33..5baed79fb7 100644
--- a/utils/TableGen/SetTheory.h
+++ b/utils/TableGen/SetTheory.h
@@ -47,8 +47,8 @@
 #ifndef SETTHEORY_H
 #define SETTHEORY_H
 
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/SourceMgr.h"
 #include <map>
 #include <vector>
diff --git a/utils/TableGen/StringToOffsetTable.h b/utils/TableGen/StringToOffsetTable.h
index a098d7d744..d94d3a2668 100644
--- a/utils/TableGen/StringToOffsetTable.h
+++ b/utils/TableGen/StringToOffsetTable.h
@@ -11,8 +11,8 @@
 #define TBLGEN_STRING_TO_OFFSET_TABLE_H
 
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cctype>
 
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index f1a06bb528..3b7d006fd1 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -15,14 +15,14 @@
 
 #include "CodeGenTarget.h"
 #include "CodeGenSchedule.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
 #include <algorithm>
 #include <map>
 #include <string>
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 49efe7ed73..06be55bde7 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "TableGenBackends.h" // Declares all backends.
-
 #include "SetTheory.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/PrettyStackTrace.h"
@@ -41,7 +40,8 @@ enum ActionType {
   GenTgtIntrinsic,
   GenEDInfo,
   PrintEnums,
-  PrintSets
+  PrintSets,
+  GenOptParserDefs
 };
 
 namespace {
@@ -83,6 +83,8 @@ namespace {
                                "Print enum values for a class"),
                     clEnumValN(PrintSets, "print-sets",
                                "Print expanded sets for testing DAG exprs"),
+                    clEnumValN(GenOptParserDefs, "gen-opt-parser-defs",
+                               "Generate option definitions"),
                     clEnumValEnd));
 
   cl::opt<std::string>
@@ -139,6 +141,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenEDInfo:
     EmitEnhancedDisassemblerInfo(Records, OS);
     break;
+  case GenOptParserDefs:
+    EmitOptParser(Records, OS);
+    break;
   case PrintEnums:
   {
     std::vector<Record*> Recs = Records.getAllDerivedDefinitions(Class);
diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h
index f0d25d8a2c..4c0d8dcb4f 100644
--- a/utils/TableGen/TableGenBackends.h
+++ b/utils/TableGen/TableGenBackends.h
@@ -75,5 +75,6 @@ void EmitPseudoLowering(RecordKeeper &RK, raw_ostream &OS);
 void EmitRegisterInfo(RecordKeeper &RK, raw_ostream &OS);
 void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS);
 void EmitMapTable(RecordKeeper &RK, raw_ostream &OS);
+void EmitOptParser(RecordKeeper &RK, raw_ostream &OS);
 
 } // End llvm namespace
diff --git a/utils/TableGen/X86DisassemblerShared.h b/utils/TableGen/X86DisassemblerShared.h
index c13a0cc467..3ff922b822 100644
--- a/utils/TableGen/X86DisassemblerShared.h
+++ b/utils/TableGen/X86DisassemblerShared.h
@@ -10,8 +10,8 @@
 #ifndef X86DISASSEMBLERSHARED_H
 #define X86DISASSEMBLERSHARED_H
 
-#include <string>
 #include <string.h>
+#include <string>
 
 #define INSTRUCTION_SPECIFIER_FIELDS       \
   struct OperandSpecifier operands[X86_MAX_OPERANDS]; \
diff --git a/utils/TableGen/X86DisassemblerTables.cpp b/utils/TableGen/X86DisassemblerTables.cpp
index 468a1f81c7..40a0c1b260 100644
--- a/utils/TableGen/X86DisassemblerTables.cpp
+++ b/utils/TableGen/X86DisassemblerTables.cpp
@@ -14,13 +14,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "X86DisassemblerShared.h"
 #include "X86DisassemblerTables.h"
-
-#include "llvm/TableGen/TableGenBackend.h"
+#include "X86DisassemblerShared.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
+#include "llvm/TableGen/TableGenBackend.h"
 #include <map>
 
 using namespace llvm;
diff --git a/utils/TableGen/X86DisassemblerTables.h b/utils/TableGen/X86DisassemblerTables.h
index ea006c05b9..01aeaaf0bf 100644
--- a/utils/TableGen/X86DisassemblerTables.h
+++ b/utils/TableGen/X86DisassemblerTables.h
@@ -19,9 +19,7 @@
 
 #include "X86DisassemblerShared.h"
 #include "X86ModRMFilters.h"
-
 #include "llvm/Support/raw_ostream.h"
-
 #include <vector>
 
 namespace llvm {
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index d6ed2fe2c6..b99a6eb87e 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -14,12 +14,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "X86DisassemblerShared.h"
 #include "X86RecognizableInstr.h"
+#include "X86DisassemblerShared.h"
 #include "X86ModRMFilters.h"
-
 #include "llvm/Support/ErrorHandling.h"
-
 #include <string>
 
 using namespace llvm;
diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h
index 9feb3c3c7d..9ec36a39df 100644
--- a/utils/TableGen/X86RecognizableInstr.h
+++ b/utils/TableGen/X86RecognizableInstr.h
@@ -17,13 +17,11 @@
 #ifndef X86RECOGNIZABLEINSTR_H
 #define X86RECOGNIZABLEINSTR_H
 
-#include "X86DisassemblerTables.h"
-
 #include "CodeGenTarget.h"
-
-#include "llvm/TableGen/Record.h"
-#include "llvm/Support/DataTypes.h"
+#include "X86DisassemblerTables.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/TableGen/Record.h"
 
 namespace llvm {
 
diff --git a/utils/emacs/llvm-mode.el b/utils/emacs/llvm-mode.el
index 3780624b5a..25d9742186 100644
--- a/utils/emacs/llvm-mode.el
+++ b/utils/emacs/llvm-mode.el
@@ -5,7 +5,6 @@
 ;; Create mode-specific tables.
 (defvar llvm-mode-syntax-table nil
   "Syntax table used while in LLVM mode.")
-
 (defvar llvm-font-lock-keywords
   (list
    ;; Comments
@@ -31,12 +30,14 @@
                     "define" "global" "constant" "const" "internal" "linkonce" "linkonce_odr"
                     "weak" "weak_odr" "appending" "uninitialized" "implementation" "..."
                     "null" "undef" "to" "except" "not" "target" "endian" "little" "big"
-                    "pointersize" "deplibs" "volatile" "fastcc" "coldcc" "cc") 'words) . font-lock-keyword-face)
+                    "pointersize" "volatile" "fastcc" "coldcc" "cc") 'words) . font-lock-keyword-face)
    ;; Arithmetic and Logical Operators
    `(,(regexp-opt '("add" "sub" "mul" "div" "rem" "and" "or" "xor"
                     "setne" "seteq" "setlt" "setgt" "setle" "setge") 'words) . font-lock-keyword-face)
+   ;; Floating-point operators
+   `(,(regexp-opt '("fadd" "fsub" "fmul" "fdiv" "frem") 'words) . font-lock-keyword-face)
    ;; Special instructions
-   `(,(regexp-opt '("phi" "tail" "call" "cast" "select" "to" "shl" "shr" "vaarg" "vanext") 'words) . font-lock-keyword-face)
+   `(,(regexp-opt '("phi" "tail" "call" "cast" "select" "to" "shl" "shr" "fcmp" "icmp" "vaarg" "vanext") 'words) . font-lock-keyword-face)
    ;; Control instructions
    `(,(regexp-opt '("ret" "br" "switch" "invoke" "unwind" "unreachable") 'words) . font-lock-keyword-face)
    ;; Memory operators
@@ -111,7 +112,7 @@
   (interactive)
   (kill-all-local-variables)
   (use-local-map llvm-mode-map)         ; Provides the local keymap.
-  (setq major-mode 'llvm-mode)          
+  (setq major-mode 'llvm-mode)
 
   (make-local-variable 'font-lock-defaults)
   (setq major-mode 'llvm-mode           ; This is how describe-mode
diff --git a/utils/lit/lit/ExampleTests/ManyTests/lit.local.cfg b/utils/lit/lit/ExampleTests/ManyTests/lit.local.cfg
new file mode 100644
index 0000000000..6cc47522b1
--- /dev/null
+++ b/utils/lit/lit/ExampleTests/ManyTests/lit.local.cfg
@@ -0,0 +1,23 @@
+# -*- Python -*-
+
+Test = lit.Test
+
+class ManyTests(object):
+    def __init__(self, N=10000):
+        self.N = N
+
+    def getTestsInDirectory(self, testSuite, path_in_suite,
+                            litConfig, localConfig):
+        for i in range(self.N):
+            test_name = 'test-%04d' % (i,)
+            yield Test.Test(testSuite, path_in_suite + (test_name,),
+                            localConfig)
+
+    def execute(self, test, litConfig):
+        # Do a "non-trivial" amount of Python work.
+        sum = 0
+        for i in range(10000):
+            sum += i
+        return Test.PASS,''
+
+config.test_format = ManyTests()
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index e339652f83..652e8b499c 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -241,11 +241,16 @@ def executeShCmd(cmd, cfg, cwd, results):
     return exitCode
 
 def executeScriptInternal(test, litConfig, tmpBase, commands, cwd):
-    ln = ' &&\n'.join(commands)
-    try:
-        cmd = ShUtil.ShParser(ln, litConfig.isWindows).parse()
-    except:
-        return (Test.FAIL, "shell parser error on: %r" % ln)
+    cmds = []
+    for ln in commands:
+        try:
+            cmds.append(ShUtil.ShParser(ln, litConfig.isWindows).parse())
+        except:
+            return (Test.FAIL, "shell parser error on: %r" % ln)
+
+    cmd = cmds[0]
+    for c in cmds[1:]:
+        cmd = ShUtil.Seq(cmd, '&&', c)
 
     results = []
     try:
@@ -352,7 +357,7 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
     if isWin32CMDEXE:
         f.write('\nif %ERRORLEVEL% NEQ 0 EXIT\n'.join(commands))
     else:
-        f.write(' &&\n'.join(commands))
+        f.write('{ ' + '; } &&\n{ '.join(commands) + '; }')
     f.write('\n')
     f.close()
 
diff --git a/utils/lit/lit/__init__.py b/utils/lit/lit/__init__.py
index f3fbb1cd82..3e61bbd770 100644
--- a/utils/lit/lit/__init__.py
+++ b/utils/lit/lit/__init__.py
@@ -4,7 +4,7 @@ from main import main
 
 __author__ = 'Daniel Dunbar'
 __email__ = 'daniel@zuster.org'
-__versioninfo__ = (0, 2, 0)
+__versioninfo__ = (0, 3, 0)
 __version__ = '.'.join(map(str, __versioninfo__)) + 'dev'
 
 __all__ = []
diff --git a/utils/obj2yaml/coff2yaml.cpp b/utils/obj2yaml/coff2yaml.cpp
index c9a71591ef..f0241d931e 100644
--- a/utils/obj2yaml/coff2yaml.cpp
+++ b/utils/obj2yaml/coff2yaml.cpp
@@ -8,7 +8,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "obj2yaml.h"
-
 #include "llvm/Object/COFF.h"
 
 
diff --git a/utils/obj2yaml/obj2yaml.cpp b/utils/obj2yaml/obj2yaml.cpp
index ff253fa131..bdc461a947 100644
--- a/utils/obj2yaml/obj2yaml.cpp
+++ b/utils/obj2yaml/obj2yaml.cpp
@@ -8,17 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "obj2yaml.h"
-
 #include "llvm/ADT/OwningPtr.h"
-
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/COFF.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
 
-#include "llvm/Object/Archive.h"
-#include "llvm/Object/COFF.h"
-
 const char endl = '\n';
 
 namespace yaml {  // generic yaml-writing specific routines
diff --git a/utils/obj2yaml/obj2yaml.h b/utils/obj2yaml/obj2yaml.h
index 2a23b49682..0bc376a6db 100644
--- a/utils/obj2yaml/obj2yaml.h
+++ b/utils/obj2yaml/obj2yaml.h
@@ -14,10 +14,9 @@
 #define LLVM_UTILS_OBJ2YAML_H
 
 #include "llvm/ADT/ArrayRef.h"
-
-#include "llvm/Support/system_error.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
 
 namespace yaml {  // routines for writing YAML
 // Write a hex stream:
diff --git a/utils/sort_includes.py b/utils/sort_includes.py
new file mode 100755
index 0000000000..8ff7d059c4
--- /dev/null
+++ b/utils/sort_includes.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+
+"""Script to sort the top-most block of #include lines.
+
+Assumes the LLVM coding conventions.
+
+Currently, this script only bothers sorting the llvm/... headers. Patches
+welcome for more functionality, and sorting other header groups.
+"""
+
+import argparse
+import os
+
+def sort_includes(f):
+  """Sort the #include lines of a specific file."""
+
+  # Skip files which are under INPUTS trees or test trees.
+  if 'INPUTS/' in f.name or 'test/' in f.name:
+    return
+
+  ext = os.path.splitext(f.name)[1]
+  if ext not in ['.cpp', '.c', '.h', '.inc', '.def']:
+    return
+
+  lines = f.readlines()
+  look_for_api_header = ext in ['.cpp', '.c']
+  found_headers = False
+  headers_begin = 0
+  headers_end = 0
+  api_headers = []
+  local_headers = []
+  project_headers = []
+  system_headers = []
+  for (i, l) in enumerate(lines):
+    if l.strip() == '':
+      continue
+    if l.startswith('#include'):
+      if not found_headers:
+        headers_begin = i
+        found_headers = True
+      headers_end = i
+      header = l[len('#include'):].lstrip()
+      if look_for_api_header and header.startswith('"'):
+        api_headers.append(header)
+        look_for_api_header = False
+        continue
+      if header.startswith('<') or header.startswith('"gtest/'):
+        system_headers.append(header)
+        continue
+      if (header.startswith('"llvm/') or header.startswith('"llvm-c/') or
+          header.startswith('"clang/') or header.startswith('"clang-c/')):
+        project_headers.append(header)
+        continue
+      local_headers.append(header)
+      continue
+
+    # Only allow comments and #defines prior to any includes. If either are
+    # mixed with includes, the order might be sensitive.
+    if found_headers:
+      break
+    if l.startswith('//') or l.startswith('#define') or l.startswith('#ifndef'):
+      continue
+    break
+  if not found_headers:
+    return
+
+  local_headers.sort()
+  project_headers.sort()
+  system_headers.sort()
+  headers = api_headers + local_headers + project_headers + system_headers
+  header_lines = ['#include ' + h for h in headers]
+  lines = lines[:headers_begin] + header_lines + lines[headers_end + 1:]
+
+  f.seek(0)
+  f.truncate()
+  f.writelines(lines)
+
+def main():
+  parser = argparse.ArgumentParser(description=__doc__)
+  parser.add_argument('files', nargs='+', type=argparse.FileType('r+'),
+                      help='the source files to sort includes within')
+  args = parser.parse_args()
+  for f in args.files:
+    sort_includes(f)
+
+if __name__ == '__main__':
+  main()
diff --git a/utils/unittest/googletest/Makefile b/utils/unittest/googletest/Makefile
index 22c8f36fcc..bf736704f5 100644
--- a/utils/unittest/googletest/Makefile
+++ b/utils/unittest/googletest/Makefile
@@ -36,4 +36,6 @@ endif
 
 NO_INSTALL = 1
 
+SOURCES = $(filter-out gtest-all.cc, $(notdir $(wildcard $(PROJ_SRC_DIR)/*.cc)))
+
 include $(LEVEL)/Makefile.common
diff --git a/utils/unittest/googletest/README.LLVM b/utils/unittest/googletest/README.LLVM
index 51340e9ceb..3565a3280e 100644
--- a/utils/unittest/googletest/README.LLVM
+++ b/utils/unittest/googletest/README.LLVM
@@ -19,9 +19,10 @@ $ rmdir src
 $ mv *.h include/gtest/internal/
 
 # Update paths to the included files
+$ perl -pi -e 's|^#include "src/|#include "|' gtest-all.cc
 $ perl -pi -e 's|^#include "src/|#include "gtest/internal/|' *.cc
 
-$ rm -f gtest-all.cc gtest_main.cc
+$ rm -f gtest_main.cc
 
 $ mv COPYING LICENSE.TXT
 
diff --git a/utils/unittest/googletest/gtest-all.cc b/utils/unittest/googletest/gtest-all.cc
new file mode 100644
index 0000000000..97753e5b9d
--- /dev/null
+++ b/utils/unittest/googletest/gtest-all.cc
@@ -0,0 +1,48 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: mheule@google.com (Markus Heule)
+//
+// Google C++ Testing Framework (Google Test)
+//
+// Sometimes it's desirable to build Google Test by compiling a single file.
+// This file serves this purpose.
+
+// This line ensures that gtest.h can be compiled on its own, even
+// when it's fused.
+#include "gtest/gtest.h"
+
+// The following lines pull in the real gtest *.cc files.
+#include "gtest.cc"
+#include "gtest-death-test.cc"
+#include "gtest-filepath.cc"
+#include "gtest-port.cc"
+#include "gtest-printers.cc"
+#include "gtest-test-part.cc"
+#include "gtest-typed-test.cc"
diff --git a/utils/unittest/googletest/gtest-filepath.cc b/utils/unittest/googletest/gtest-filepath.cc
index bc610094e1..ad1bab8e9f 100644
--- a/utils/unittest/googletest/gtest-filepath.cc
+++ b/utils/unittest/googletest/gtest-filepath.cc
@@ -69,7 +69,6 @@ namespace internal {
 // of them.
 const char kPathSeparator = '\\';
 const char kAlternatePathSeparator = '/';
-const char kPathSeparatorString[] = "\\";
 const char kAlternatePathSeparatorString[] = "/";
 # if GTEST_OS_WINDOWS_MOBILE
 // Windows CE doesn't have a current directory. You should not use
@@ -83,7 +82,6 @@ const char kCurrentDirectoryString[] = ".\\";
 # endif  // GTEST_OS_WINDOWS_MOBILE
 #else
 const char kPathSeparator = '/';
-const char kPathSeparatorString[] = "/";
 const char kCurrentDirectoryString[] = "./";
 #endif  // GTEST_OS_WINDOWS
 
diff --git a/utils/unittest/googletest/include/gtest/internal/gtest-internal.h b/utils/unittest/googletest/include/gtest/internal/gtest-internal.h
index f8a5cc9447..a94bf28421 100644
--- a/utils/unittest/googletest/include/gtest/internal/gtest-internal.h
+++ b/utils/unittest/googletest/include/gtest/internal/gtest-internal.h
@@ -56,7 +56,9 @@
 #include "gtest/internal/gtest-filepath.h"
 #include "gtest/internal/gtest-type-util.h"
 
+#if !GTEST_NO_LLVM_RAW_OSTREAM
 #include "llvm/Support/raw_os_ostream.h"
+#endif
 
 // Due to C++ preprocessor weirdness, we need double indirection to
 // concatenate two tokens when one of them is __LINE__.  Writing
@@ -100,6 +102,7 @@
 // std::ostream with an implicit conversion to raw_ostream& and stream
 // to that.  This causes the compiler to prefer std::ostream overloads
 // but still find raw_ostream& overloads.
+#if !GTEST_NO_LLVM_RAW_OSTREAM
 namespace llvm {
 class convertible_fwd_ostream : public std::ostream {
   raw_os_ostream ros_;
@@ -115,6 +118,12 @@ inline void GTestStreamToHelper(std::ostream* os, const T& val) {
   llvm::convertible_fwd_ostream cos(*os);
   cos << val;
 }
+#else
+template <typename T>
+inline void GTestStreamToHelper(std::ostream* os, const T& val) {
+  *os << val;
+}
+#endif
 
 class ProtocolMessage;
 namespace proto2 { class Message; }
diff --git a/utils/vim/llvm.vim b/utils/vim/llvm.vim
index f80de83da8..753549fd72 100644
--- a/utils/vim/llvm.vim
+++ b/utils/vim/llvm.vim
@@ -14,50 +14,47 @@ syn case match
 " Types.
 " Types also include struct, array, vector, etc. but these don't
 " benefit as much from having dedicated highlighting rules.
-syn keyword llvmType void float double half
-syn keyword llvmType x86_fp80 fp128 ppc_fp128
+syn keyword llvmType void half float double x86_fp80 fp128 ppc_fp128
+syn keyword llvmType label metadata x86_mmx
 syn keyword llvmType type label opaque
 syn match   llvmType /\<i\d\+\>/
 
 " Instructions.
 " The true and false tokens can be used for comparison opcodes, but it's
 " much more common for these tokens to be used for boolean constants.
-syn keyword llvmStatement add fadd sub fsub mul fmul
-syn keyword llvmStatement sdiv udiv fdiv srem urem frem
-syn keyword llvmStatement and or xor
-syn keyword llvmStatement icmp fcmp
-syn keyword llvmStatement eq ne ugt uge ult ule sgt sge slt sle
-syn keyword llvmStatement oeq ogt oge olt ole one ord ueq ugt uge
-syn keyword llvmStatement ult ule une uno
-syn keyword llvmStatement nuw nsw exact inbounds
-syn keyword llvmStatement phi call select shl lshr ashr va_arg
-syn keyword llvmStatement trunc zext sext
-syn keyword llvmStatement fptrunc fpext fptoui fptosi uitofp sitofp
-syn keyword llvmStatement ptrtoint inttoptr bitcast
-syn keyword llvmStatement ret br indirectbr switch invoke unwind unreachable
-syn keyword llvmStatement malloc alloca free load store getelementptr
-syn keyword llvmStatement extractelement insertelement shufflevector
-syn keyword llvmStatement extractvalue insertvalue
+syn keyword llvmStatement add alloca and arcp ashr atomicrmw bitcast br call
+syn keyword llvmStatement cmpxchg eq exact extractelement extractvalue fadd fast
+syn keyword llvmStatement fcmp fdiv fence fmul fpext fptosi fptoui fptrunc free
+syn keyword llvmStatement frem fsub getelementptr icmp inbounds indirectbr
+syn keyword llvmStatement insertelement insertvalue inttoptr invoke landingpad
+syn keyword llvmStatement load lshr malloc max min mul nand ne ninf nnan nsw nsz
+syn keyword llvmStatement nuw oeq oge ogt ole olt one or ord phi ptrtoint resume
+syn keyword llvmStatement ret sdiv select sext sge sgt shl shufflevector sitofp
+syn keyword llvmStatement sle slt srem store sub switch trunc udiv ueq uge ugt
+syn keyword llvmStatement uitofp ule ult umax umin une uno unreachable unwind
+syn keyword llvmStatement urem va_arg xchg xor zext
 
 " Keywords.
-syn keyword llvmKeyword define declare global constant
-syn keyword llvmKeyword internal external private
-syn keyword llvmKeyword linkonce linkonce_odr weak weak_odr appending
-syn keyword llvmKeyword common extern_weak
-syn keyword llvmKeyword thread_local dllimport dllexport
-syn keyword llvmKeyword hidden protected default
-syn keyword llvmKeyword except deplibs
-syn keyword llvmKeyword volatile fastcc coldcc cc ccc
-syn keyword llvmKeyword x86_stdcallcc x86_fastcallcc
-syn keyword llvmKeyword ptx_kernel ptx_device
-syn keyword llvmKeyword signext zeroext inreg sret nounwind noreturn
-syn keyword llvmKeyword nocapture byval nest readnone readonly noalias uwtable
-syn keyword llvmKeyword inlinehint noinline alwaysinline optsize ssp sspreq
-syn keyword llvmKeyword noredzone noimplicitfloat naked alignstack
-syn keyword llvmKeyword module asm align tail to
-syn keyword llvmKeyword addrspace section alias sideeffect c gc
-syn keyword llvmKeyword target datalayout triple
-syn keyword llvmKeyword blockaddress
+syn keyword llvmKeyword acq_rel acquire address_safety addrspace alias align
+syn keyword llvmKeyword alignstack alwaysinline appending arm_aapcs_vfpcc
+syn keyword llvmKeyword arm_aapcscc arm_apcscc asm atomic available_externally
+syn keyword llvmKeyword blockaddress byval c catch cc ccc cleanup coldcc common
+syn keyword llvmKeyword constant datalayout declare default define deplibs
+syn keyword llvmKeyword dllexport dllimport except extern_weak external fastcc
+syn keyword llvmKeyword filter gc global hidden initialexec inlinehint inreg
+syn keyword llvmKeyword intel_ocl_bicc inteldialect internal linker_private
+syn keyword llvmKeyword linker_private_weak linker_private_weak_def_auto
+syn keyword llvmKeyword linkonce linkonce_odr linkonce_odr_auto_hide
+syn keyword llvmKeyword localdynamic localexec minsize module monotonic
+syn keyword llvmKeyword msp430_intrcc naked nest noalias nocapture
+syn keyword llvmKeyword noimplicitfloat noinline nonlazybind noredzone noreturn
+syn keyword llvmKeyword nounwind optsize personality private protected
+syn keyword llvmKeyword ptx_device ptx_kernel readnone readonly release
+syn keyword llvmKeyword returns_twice section seq_cst sideeffect signext
+syn keyword llvmKeyword singlethread spir_func spir_kernel sret ssp sspreq tail
+syn keyword llvmKeyword target thread_local to triple unnamed_addr unordered
+syn keyword llvmKeyword uwtable volatile weak weak_odr x86_fastcallcc
+syn keyword llvmKeyword x86_stdcallcc x86_thiscallcc zeroext
 
 " Obsolete keywords.
 syn keyword llvmError  getresult begin end
diff --git a/utils/yaml-bench/YAMLBench.cpp b/utils/yaml-bench/YAMLBench.cpp
index e5ee52a16d..eef4a725a1 100644
--- a/utils/yaml-bench/YAMLBench.cpp
+++ b/utils/yaml-bench/YAMLBench.cpp
@@ -17,11 +17,11 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/system_error.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/YAMLParser.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
 
 using namespace llvm;
 
diff --git a/utils/yaml2obj/yaml2obj.cpp b/utils/yaml2obj/yaml2obj.cpp
index 4fc620f4ea..c9436090dd 100644
--- a/utils/yaml2obj/yaml2obj.cpp
+++ b/utils/yaml2obj/yaml2obj.cpp
@@ -25,12 +25,11 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/system_error.h"
 #include "llvm/Support/YAMLParser.h"
-
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
 #include <vector>
 
 using namespace llvm;
author	Derek Schuff <dschuff@chromium.org>	2013-01-09 16:55:43 -0800
committer	Derek Schuff <dschuff@chromium.org>	2013-01-11 13:47:37 -0800
commit	b770d0e0636a4b5ad61b1ca661caee67576c05fc (patch)
tree	c486ce032d41f97313c50629bd5b879f53e6ccbf
parent	b835840cf112a6178506d834b58aa625f59a8994 (diff)
parent	1ad9253c9d34ccbce3e7e4ea5d87c266cbf93410 (diff)