175 files changed, 4726 insertions, 2607 deletions
diff --git a/Makefile.rules b/Makefile.rules
index 4edf401f8d..51accc512b 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -571,7 +571,11 @@ endif
 #--------------------------------------------------------------------
 
 ifeq ($(HOST_OS),Darwin)
+ ifdef MACOSX_DEPLOYMENT_TARGET
+  DARWIN_VERSION := $(MACOSX_DEPLOYMENT_TARGET)
+ else
   DARWIN_VERSION := `sw_vers -productVersion`
+ endif
   # Strip a number like 10.4.7 to 10.4
   DARWIN_VERSION := $(shell echo $(DARWIN_VERSION)| sed -E 's/(10.[0-9]).*/\1/')
   # Get "4" out of 10.4 for later pieces in the makefile.
diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst
index d3995e7036..bd26f7b150 100644
--- a/docs/BitCodeFormat.rst
+++ b/docs/BitCodeFormat.rst
@@ -489,6 +489,8 @@ The magic number for LLVM IR files is:
 When combined with the bitcode magic number and viewed as bytes, this is
 ``"BC 0xC0DE"``.
 
+.. _Signed VBRs:
+
 Signed VBRs
 ^^^^^^^^^^^
 
@@ -507,6 +509,7 @@ As such, signed VBR values of a specific width are emitted as follows:
 With this encoding, small positive and small negative values can both be emitted
 efficiently. Signed VBR encoding is used in ``CST_CODE_INTEGER`` and
 ``CST_CODE_WIDE_INTEGER`` records within ``CONSTANTS_BLOCK`` blocks.
+It is also used for phi instruction operands in `MODULE_CODE_VERSION`_ 1.
 
 LLVM IR Blocks
 ^^^^^^^^^^^^^^
@@ -553,13 +556,57 @@ block may contain the following sub-blocks:
 * `FUNCTION_BLOCK`_
 * `METADATA_BLOCK`_
 
+.. _MODULE_CODE_VERSION:
+
 MODULE_CODE_VERSION Record
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 ``[VERSION, version#]``
 
 The ``VERSION`` record (code 1) contains a single value indicating the format
-version. Only version 0 is supported at this time.
+version. Versions 0 and 1 are supported at this time. The difference between
+version 0 and 1 is in the encoding of instruction operands in
+each `FUNCTION_BLOCK`_.
+
+In version 0, each value defined by an instruction is assigned an ID
+unique to the function. Function-level value IDs are assigned starting from
+``NumModuleValues`` since they share the same namespace as module-level
+values. The value enumerator resets after each function. When a value is
+an operand of an instruction, the value ID is used to represent the operand.
+For large functions or large modules, these operand values can be large.
+
+The encoding in version 1 attempts to avoid large operand values
+in common cases. Instead of using the value ID directly, operands are
+encoded as relative to the current instruction. Thus, if an operand
+is the value defined by the previous instruction, the operand
+will be encoded as 1.
+
+For example, instead of
+
+.. code-block:: llvm
+
+  #n = load #n-1
+  #n+1 = icmp eq #n, #const0
+  br #n+1, label #(bb1), label #(bb2)
+
+version 1 will encode the instructions as
+
+.. code-block:: llvm
+
+  #n = load #1
+  #n+1 = icmp eq #1, (#n+1)-#const0
+  br #1, label #(bb1), label #(bb2)
+
+Note in the example that operands which are constants also use
+the relative encoding, while operands like basic block labels
+do not use the relative encoding.
+
+Forward references will result in a negative value.
+This can be inefficient, as operands are normally encoded
+as unsigned VBRs. However, forward references are rare, except in the
+case of phi instructions. For phi instructions, operands are encoded as
+`Signed VBRs`_ to deal with forward references.
+
 
 MODULE_CODE_TRIPLE Record
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/docs/DeveloperPolicy.rst b/docs/DeveloperPolicy.rst
index 391b43a3a2..e35e729556 100644
--- a/docs/DeveloperPolicy.rst
+++ b/docs/DeveloperPolicy.rst
@@ -138,8 +138,7 @@ favor for someone else.  Note that anyone is welcome to review and give feedback
 on a patch, but only people with Subversion write access can approve it.
 
 There is a web based code review tool that can optionally be used
-for code reviews. See the documentation on `Code Reviews with
-Phabricator <Phabricator.html>`_.
+for code reviews. See :doc:`Phabricator`.
 
 Code Owners
 -----------
diff --git a/docs/HowToSetUpLLVMStyleRTTI.rst b/docs/HowToSetUpLLVMStyleRTTI.rst
index a3403c2fc1..aa1ad84afe 100644
--- a/docs/HowToSetUpLLVMStyleRTTI.rst
+++ b/docs/HowToSetUpLLVMStyleRTTI.rst
@@ -77,8 +77,8 @@ steps:
        public:
       +  /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.)
       +  enum ShapeKind {
-      +    SquareKind,
-      +    CircleKind
+      +    SK_Square,
+      +    SK_Circle
       +  };
       +private:
       +  const ShapeKind Kind;
@@ -121,8 +121,8 @@ steps:
        public:
          /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.)
          enum ShapeKind {
-           SquareKind,
-           CircleKind
+           SK_Square,
+           SK_Circle
          };
        private:
          const ShapeKind Kind;
@@ -138,7 +138,7 @@ steps:
          double SideLength;
        public:
       -  Square(double S) : SideLength(S) {}
-      +  Square(double S) : Shape(SquareKind), SideLength(S) {}
+      +  Square(double S) : Shape(SK_Square), SideLength(S) {}
          double computeArea() /* override */;
        };
 
@@ -146,7 +146,7 @@ steps:
          double Radius;
        public:
       -  Circle(double R) : Radius(R) {}
-      +  Circle(double R) : Shape(CircleKind), Radius(R) {}
+      +  Circle(double R) : Shape(SK_Circle), Radius(R) {}
          double computeArea() /* override */;
        };
 
@@ -163,8 +163,8 @@ steps:
        public:
          /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.)
          enum ShapeKind {
-           SquareKind,
-           CircleKind
+           SK_Square,
+           SK_Circle
          };
        private:
          const ShapeKind Kind;
@@ -178,22 +178,22 @@ steps:
        class Square : public Shape {
          double SideLength;
        public:
-         Square(double S) : Shape(SquareKind), SideLength(S) {}
+         Square(double S) : Shape(SK_Square), SideLength(S) {}
          double computeArea() /* override */;
       +
       +  static bool classof(const Shape *S) {
-      +    return S->getKind() == SquareKind;
+      +    return S->getKind() == SK_Square;
       +  }
        };
 
        class Circle : public Shape {
          double Radius;
        public:
-         Circle(double R) : Shape(CircleKind), Radius(R) {}
+         Circle(double R) : Shape(SK_Circle), Radius(R) {}
          double computeArea() /* override */;
       +
       +  static bool classof(const Shape *S) {
-      +    return S->getKind() == CircleKind;
+      +    return S->getKind() == SK_Circle;
       +  }
        };
 
@@ -264,10 +264,10 @@ from ``Square``, and so ``ShapeKind`` becomes:
 .. code-block:: c++
 
     enum ShapeKind {
-      SquareKind,
-   +  SpecialSquareKind,
-   +  OtherSpecialSquareKind,
-      CircleKind
+      SK_Square,
+   +  SK_SpecialSquare,
+   +  SK_OtherSpecialSquare,
+      SK_Circle
     }
 
 Then in ``Square``, we would need to modify the ``classof`` like so:
@@ -275,11 +275,11 @@ Then in ``Square``, we would need to modify the ``classof`` like so:
 .. code-block:: c++
 
    -  static bool classof(const Shape *S) {
-   -    return S->getKind() == SquareKind;
+   -    return S->getKind() == SK_Square;
    -  }
    +  static bool classof(const Shape *S) {
-   +    return S->getKind() >= SquareKind &&
-   +           S->getKind() <= OtherSpecialSquareKind;
+   +    return S->getKind() >= SK_Square &&
+   +           S->getKind() <= SK_OtherSpecialSquare;
    +  }
 
 The reason that we need to test a range like this instead of just equality
diff --git a/docs/Lexicon.rst b/docs/Lexicon.rst
index 6ebe61429f..d568c0b302 100644
--- a/docs/Lexicon.rst
+++ b/docs/Lexicon.rst
@@ -20,8 +20,10 @@ A
 B
 -
 
-**BURS**
+**BB Vectorization**
+    Basic Block Vectorization
 
+**BURS**
     Bottom Up Rewriting System --- A method of instruction selection for code
     generation.  An example is the `BURG
     <http://www.program-transformation.org/Transform/BURG>`_ tool.
@@ -156,7 +158,7 @@ R
     In garbage collection, a pointer variable lying outside of the `heap`_ from
     which the collector begins its reachability analysis. In the context of code
     generation, "root" almost always refers to a "stack root" --- a local or
-    temporary variable within an executing function.</dd>
+    temporary variable within an executing function.
 
 **RPO**
     Reverse postorder
@@ -192,3 +194,10 @@ S
 **Stack Map**
     In garbage collection, metadata emitted by the code generator which
     identifies `roots`_ within the stack frame of an executing function.
+
+T
+-
+
+**TBAA**
+    Type-Based Alias Analysis
+
diff --git a/docs/Phabricator.rst b/docs/Phabricator.rst
index 773c2f07ff..cd984b09be 100644
--- a/docs/Phabricator.rst
+++ b/docs/Phabricator.rst
@@ -1,10 +1,3 @@
-.. _Phabricator:
-.. _LLVM's Phabricator: http://llvm-reviews.chandlerc.com
-.. _Code Repository Browser: http://llvm-reviews.chandlerc.com/diffusion/
-.. _Arcanist Quick Start: http://www.phabricator.com/docs/phabricator/article/Arcanist_Quick_Start.html
-.. _Arcanist User Guide: http://www.phabricator.com/docs/phabricator/article/Arcanist_User_Guide.html
-
-
 =============================
 Code Reviews with Phabricator
 =============================
@@ -94,3 +87,8 @@ Status
 
 Currently, we're testing Phabricator for use with Clang/LLVM. Please let us
 know whether you like it and what could be improved!
+
+.. _LLVM's Phabricator: http://llvm-reviews.chandlerc.com
+.. _Code Repository Browser: http://llvm-reviews.chandlerc.com/diffusion/
+.. _Arcanist Quick Start: http://www.phabricator.com/docs/phabricator/article/Arcanist_Quick_Start.html
+.. _Arcanist User Guide: http://www.phabricator.com/docs/phabricator/article/Arcanist_User_Guide.html
diff --git a/docs/SphinxQuickstartTemplate.rst b/docs/SphinxQuickstartTemplate.rst
new file mode 100644
index 0000000000..75d916368e
--- /dev/null
+++ b/docs/SphinxQuickstartTemplate.rst
@@ -0,0 +1,125 @@
+==========================
+Sphinx Quickstart Template
+==========================
+
+.. sectionauthor:: Sean Silva <silvas@purdue.edu>
+
+Introduction and Quickstart
+===========================
+
+This document is meant to get you writing documentation as fast as possible
+even if you have no previous experience with Sphinx. The goal is to take
+someone in the state of "I want to write documentation and get it added to
+LLVM's docs" and turn that into useful documentation mailed to llvm-commits
+with as little nonsense as possible.
+
+You can find this document in ``docs/SphinxQuickstartTemplate.rst``. You
+should copy it, open the new file in your text editor, write your docs, and
+then send the new document to llvm-commits for review.
+
+Focus on *content*. It is easy to fix the Sphinx (reStructuredText) syntax
+later if necessary, although reStructuredText tries to imitate common
+plain-text conventions so it should be quite natural. A basic knowledge of
+reStructuredText syntax is useful when writing the document, so the last
+~half of this document (starting with `Example Section`_) gives examples
+which should cover 99% of use cases.
+
+Let me say that again: focus on *content*.
+
+Once you have finished with the content, please send the ``.rst`` file to
+llvm-commits for review.
+
+Guidelines
+==========
+
+Try to answer the following questions in your first section:
+
+#. Why would I want to read this document?
+
+#. What should I know to be able to follow along with this document?
+
+#. What will I have learned by the end of this document?
+
+Common names for the first section are ``Introduction``, ``Overview``, or
+``Background``.
+
+If possible, make your document a "how to". Give it a name ``HowTo*.rst``
+like the other "how to" documents. This format is usually the easiest
+for another person to understand and also the most useful.
+
+You generally should not be writing documentation other than a "how to"
+unless there is already a "how to" about your topic. The reason for this
+is that without a "how to" document to read first, it is difficult for a
+person to understand a more advanced document.
+
+Focus on content (yes, I had to say it again).
+
+The rest of this document shows example reStructuredText markup constructs
+that are meant to be read by you in your text editor after you have copied
+this file into a new file for the documentation you are about to write.
+
+Example Section
+===============
+
+Your text can be *emphasized*, **bold**, or ``monospace``.
+
+Use blank lines to separate paragraphs.
+
+Headings (like ``Example Section`` just above) give your document
+structure. Use the same kind of adornments (e.g. ``======`` vs. ``------``)
+as are used in this document. The adornment must be the same length as the
+text above it. For Vim users, variations of ``yypVr=`` might be handy.
+
+Example Subsection
+------------------
+
+Make a link `like this <http://llvm.org/>`_. There is also a more
+sophisticated syntax which `can be more readable`_ for longer links since
+it disrupts the flow less. You can put the ``.. _`link text`: <URL>`` block
+pretty much anywhere later in the document.
+
+.. _`can be more readable`: http://en.wikipedia.org/wiki/LLVM
+
+Lists can be made like this:
+
+#. A list starting with ``#.`` will be automatically numbered.
+
+#. This is a second list element.
+
+   #. They nest too.
+
+You can also use unordered lists.
+
+* Stuff.
+
+  + Deeper stuff.
+
+* More stuff.
+
+Example Subsubsection
+^^^^^^^^^^^^^^^^^^^^^
+
+You can make blocks of code like this:
+
+.. code-block:: c++
+
+   int main() {
+     return 0
+   }
+
+For a shell session, use a ``bash`` code block:
+
+.. code-block:: bash
+
+   $ echo "Goodbye cruel world!"
+   $ rm -rf /
+
+If you need to show LLVM IR use the ``llvm`` code block.
+
+Hopefully you won't need to be this deep
+""""""""""""""""""""""""""""""""""""""""
+
+If you need to do fancier things than what has been shown in this document,
+you can mail the list or check Sphinx's `reStructuredText Primer`_.
+
+.. _`reStructuredText Primer`: http://sphinx.pocoo.org/rest.html
diff --git a/docs/userguides.rst b/docs/userguides.rst
index 6ff46ade48..8c1554dfce 100644
--- a/docs/userguides.rst
+++ b/docs/userguides.rst
@@ -18,6 +18,8 @@ User Guides
    HowToAddABuilder
    yaml2obj
    HowToSubmitABug
+   SphinxQuickstartTemplate
+   Phabricator
 
 * :ref:`getting_started`
     
@@ -70,6 +72,10 @@ User Guides
     
    Instructions for properly submitting information about any bugs you run into
    in the LLVM system.
+* :doc:`SphinxQuickstartTemplate`
+
+  A template + tutorial for writing new Sphinx documentation. It is meant
+  to be read in source form.
     
 * `LLVM Testing Infrastructure Guide <TestingGuide.html>`_
 
diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp
index 56d4d81b5c..215cb4d371 100644
--- a/examples/ExceptionDemo/ExceptionDemo.cpp
+++ b/examples/ExceptionDemo/ExceptionDemo.cpp
@@ -10,13 +10,13 @@
 // Demo program which implements an example LLVM exception implementation, and
 // shows several test cases including the handling of foreign exceptions.
 // It is run with type info types arguments to throw. A test will
-// be run for each given type info type. While type info types with the value 
+// be run for each given type info type. While type info types with the value
 // of -1 will trigger a foreign C++ exception to be thrown; type info types
-// <= 6 and >= 1 will cause the associated generated exceptions to be thrown 
+// <= 6 and >= 1 will cause the associated generated exceptions to be thrown
 // and caught by generated test functions; and type info types > 6
 // will result in exceptions which pass through to the test harness. All other
 // type info types are not supported and could cause a crash. In all cases,
-// the "finally" blocks of every generated test functions will executed 
+// the "finally" blocks of every generated test functions will executed
 // regardless of whether or not that test function ignores or catches the
 // thrown exception.
 //
@@ -25,25 +25,25 @@
 // ExceptionDemo
 //
 //     causes a usage to be printed to stderr
-// 
+//
 // ExceptionDemo 2 3 7 -1
 //
 //     results in the following cases:
-//         - Value 2 causes an exception with a type info type of 2 to be 
+//         - Value 2 causes an exception with a type info type of 2 to be
 //           thrown and caught by an inner generated test function.
-//         - Value 3 causes an exception with a type info type of 3 to be 
+//         - Value 3 causes an exception with a type info type of 3 to be
 //           thrown and caught by an outer generated test function.
-//         - Value 7 causes an exception with a type info type of 7 to be 
+//         - Value 7 causes an exception with a type info type of 7 to be
 //           thrown and NOT be caught by any generated function.
 //         - Value -1 causes a foreign C++ exception to be thrown and not be
 //           caught by any generated function
 //
 //     Cases -1 and 7 are caught by a C++ test harness where the validity of
-//         of a C++ catch(...) clause catching a generated exception with a 
-//         type info type of 7 is explained by: example in rules 1.6.4 in 
+//         of a C++ catch(...) clause catching a generated exception with a
+//         type info type of 7 is explained by: example in rules 1.6.4 in
 //         http://sourcery.mentor.com/public/cxx-abi/abi-eh.html (v1.22)
 //
-// This code uses code from the llvm compiler-rt project and the llvm 
+// This code uses code from the llvm compiler-rt project and the llvm
 // Kaleidoscope project.
 //
 //===----------------------------------------------------------------------===//
@@ -63,12 +63,12 @@
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/TargetSelect.h"
 
-// FIXME: Although all systems tested with (Linux, OS X), do not need this 
-//        header file included. A user on ubuntu reported, undefined symbols 
+// FIXME: Although all systems tested with (Linux, OS X), do not need this
+//        header file included. A user on ubuntu reported, undefined symbols
 //        for stderr, and fprintf, and the addition of this include fixed the
-//        issue for them. Given that LLVM's best practices include the goal 
-//        of reducing the number of redundant header files included, the 
-//        correct solution would be to find out why these symbols are not 
+//        issue for them. Given that LLVM's best practices include the goal
+//        of reducing the number of redundant header files included, the
+//        correct solution would be to find out why these symbols are not
 //        defined for the system in question, and fix the issue by finding out
 //        which LLVM header file, if any, would include these symbols.
 #include <cstdio>
@@ -81,11 +81,11 @@
 #define USE_GLOBAL_STR_CONSTS true
 #endif
 
-// System C++ ABI unwind types from: 
+// System C++ ABI unwind types from:
 //     http://sourcery.mentor.com/public/cxx-abi/abi-eh.html (v1.22)
 
 extern "C" {
-  
+
   typedef enum {
     _URC_NO_REASON = 0,
     _URC_FOREIGN_EXCEPTION_CAUGHT = 1,
@@ -97,7 +97,7 @@ extern "C" {
     _URC_INSTALL_CONTEXT = 7,
     _URC_CONTINUE_UNWIND = 8
   } _Unwind_Reason_Code;
-  
+
   typedef enum {
     _UA_SEARCH_PHASE = 1,
     _UA_CLEANUP_PHASE = 2,
@@ -105,34 +105,34 @@ extern "C" {
     _UA_FORCE_UNWIND = 8,
     _UA_END_OF_STACK = 16
   } _Unwind_Action;
-  
+
   struct _Unwind_Exception;
-  
+
   typedef void (*_Unwind_Exception_Cleanup_Fn) (_Unwind_Reason_Code,
                                                 struct _Unwind_Exception *);
-  
+
   struct _Unwind_Exception {
     uint64_t exception_class;
     _Unwind_Exception_Cleanup_Fn exception_cleanup;
-    
-    uintptr_t private_1;    
-    uintptr_t private_2;    
-    
+
+    uintptr_t private_1;
+    uintptr_t private_2;
+
     // @@@ The IA-64 ABI says that this structure must be double-word aligned.
-    //  Taking that literally does not make much sense generically.  Instead 
+    //  Taking that literally does not make much sense generically.  Instead
     //  we provide the maximum alignment required by any type for the machine.
   } __attribute__((__aligned__));
-  
+
   struct _Unwind_Context;
   typedef struct _Unwind_Context *_Unwind_Context_t;
-  
+
   extern const uint8_t *_Unwind_GetLanguageSpecificData (_Unwind_Context_t c);
   extern uintptr_t _Unwind_GetGR (_Unwind_Context_t c, int i);
   extern void _Unwind_SetGR (_Unwind_Context_t c, int i, uintptr_t n);
   extern void _Unwind_SetIP (_Unwind_Context_t, uintptr_t new_value);
   extern uintptr_t _Unwind_GetIP (_Unwind_Context_t context);
   extern uintptr_t _Unwind_GetRegionStart (_Unwind_Context_t context);
-  
+
 } // extern "C"
 
 //
@@ -148,13 +148,13 @@ struct OurExceptionType_t {
 
 /// This is our Exception class which relies on a negative offset to calculate
 /// pointers to its instances from pointers to its unwindException member.
-/// 
+///
 /// Note: The above unwind.h defines struct _Unwind_Exception to be aligned
 ///       on a double word boundary. This is necessary to match the standard:
 ///       http://refspecs.freestandards.org/abi-eh-1.21.html
 struct OurBaseException_t {
   struct OurExceptionType_t type;
-  
+
   // Note: This is properly aligned in unwind.h
   struct _Unwind_Exception unwindException;
 };
@@ -165,7 +165,7 @@ typedef struct OurBaseException_t OurException;
 typedef struct _Unwind_Exception OurUnwindException;
 
 //
-// Various globals used to support typeinfo and generatted exceptions in 
+// Various globals used to support typeinfo and generatted exceptions in
 // general
 //
 
@@ -173,7 +173,7 @@ static std::map<std::string, llvm::Value*> namedValues;
 
 int64_t ourBaseFromUnwindOffset;
 
-const unsigned char ourBaseExcpClassChars[] = 
+const unsigned char ourBaseExcpClassChars[] =
 {'o', 'b', 'j', '\0', 'b', 'a', 's', '\0'};
 
 
@@ -203,7 +203,7 @@ typedef std::vector<llvm::Type*> ArgTypes;
 /// @param retType function return type
 /// @param theArgTypes function's ordered argument types
 /// @param theArgNames function's ordered arguments needed if use of this
-///        function corresponds to a function definition. Use empty 
+///        function corresponds to a function definition. Use empty
 ///        aggregate for function declarations.
 /// @param functName function name
 /// @param linkage function linkage
@@ -224,17 +224,17 @@ llvm::Function *createFunction(llvm::Module &module,
     llvm::Function::Create(functType, linkage, functName, &module);
   if (!ret || declarationOnly)
     return(ret);
-  
+
   namedValues.clear();
-  unsigned i = 0; 
+  unsigned i = 0;
   for (llvm::Function::arg_iterator argIndex = ret->arg_begin();
        i != theArgNames.size();
        ++argIndex, ++i) {
-    
+
     argIndex->setName(theArgNames[i]);
     namedValues[theArgNames[i]] = argIndex;
   }
-  
+
   return(ret);
 }
 
@@ -250,13 +250,13 @@ static llvm::AllocaInst *createEntryBlockAlloca(llvm::Function &function,
                                                 const std::string &varName,
                                                 llvm::Type *type,
                                                 llvm::Constant *initWith = 0) {
-  llvm::BasicBlock &block = function.getEntryBlock(); 
+  llvm::BasicBlock &block = function.getEntryBlock();
   llvm::IRBuilder<> tmp(&block, block.begin());
   llvm::AllocaInst *ret = tmp.CreateAlloca(type, 0, varName.c_str());
-  
-  if (initWith) 
+
+  if (initWith)
     tmp.CreateStore(initWith, ret);
-  
+
   return(ret);
 }
 
@@ -266,7 +266,7 @@ static llvm::AllocaInst *createEntryBlockAlloca(llvm::Function &function,
 //
 
 //
-// Runtime C Library functions 
+// Runtime C Library functions
 //
 
 // Note: using an extern "C" block so that static functions can be used
@@ -275,7 +275,7 @@ extern "C" {
 // Note: Better ways to decide on bit width
 //
 /// Prints a 32 bit number, according to the format, to stderr.
-/// @param intToPrint integer to print 
+/// @param intToPrint integer to print
 /// @param format printf like format to use when printing
 void print32Int(int intToPrint, const char *format) {
   if (format) {
@@ -292,7 +292,7 @@ void print32Int(int intToPrint, const char *format) {
 // Note: Better ways to decide on bit width
 //
 /// Prints a 64 bit number, according to the format, to stderr.
-/// @param intToPrint integer to print 
+/// @param intToPrint integer to print
 /// @param format printf like format to use when printing
 void print64Int(long int intToPrint, const char *format) {
   if (format) {
@@ -327,19 +327,19 @@ void deleteOurException(OurUnwindException *expToDelete) {
   fprintf(stderr,
           "deleteOurException(...).\n");
 #endif
-  
+
   if (expToDelete &&
       (expToDelete->exception_class == ourBaseExceptionClass)) {
-    
+
     free(((char*) expToDelete) + ourBaseFromUnwindOffset);
   }
 }
 
 
-/// This function is the struct _Unwind_Exception API mandated delete function 
-/// used by foreign exception handlers when deleting our exception 
+/// This function is the struct _Unwind_Exception API mandated delete function
+/// used by foreign exception handlers when deleting our exception
 /// (OurException), instances.
-/// @param reason @link http://refspecs.freestandards.org/abi-eh-1.21.html 
+/// @param reason @link http://refspecs.freestandards.org/abi-eh-1.21.html
 /// @unlink
 /// @param expToDelete exception instance to delete
 void deleteFromUnwindOurException(_Unwind_Reason_Code reason,
@@ -348,7 +348,7 @@ void deleteFromUnwindOurException(_Unwind_Reason_Code reason,
   fprintf(stderr,
           "deleteFromUnwindOurException(...).\n");
 #endif
-  
+
   deleteOurException(expToDelete);
 }
 
@@ -362,13 +362,13 @@ OurUnwindException *createOurException(int type) {
   (ret->type).type = type;
   (ret->unwindException).exception_class = ourBaseExceptionClass;
   (ret->unwindException).exception_cleanup = deleteFromUnwindOurException;
-  
+
   return(&(ret->unwindException));
 }
 
 
-/// Read a uleb128 encoded value and advance pointer 
-/// See Variable Length Data in: 
+/// Read a uleb128 encoded value and advance pointer
+/// See Variable Length Data in:
 /// @link http://dwarfstd.org/Dwarf3.pdf @unlink
 /// @param data reference variable holding memory pointer to decode from
 /// @returns decoded value
@@ -377,22 +377,22 @@ static uintptr_t readULEB128(const uint8_t **data) {
   uintptr_t shift = 0;
   unsigned char byte;
   const uint8_t *p = *data;
-  
+
   do {
     byte = *p++;
     result |= (byte & 0x7f) << shift;
     shift += 7;
-  } 
+  }
   while (byte & 0x80);
-  
+
   *data = p;
-  
+
   return result;
 }
 
 
-/// Read a sleb128 encoded value and advance pointer 
-/// See Variable Length Data in: 
+/// Read a sleb128 encoded value and advance pointer
+/// See Variable Length Data in:
 /// @link http://dwarfstd.org/Dwarf3.pdf @unlink
 /// @param data reference variable holding memory pointer to decode from
 /// @returns decoded value
@@ -401,26 +401,26 @@ static uintptr_t readSLEB128(const uint8_t **data) {
   uintptr_t shift = 0;
   unsigned char byte;
   const uint8_t *p = *data;
-  
+
   do {
     byte = *p++;
     result |= (byte & 0x7f) << shift;
     shift += 7;
-  } 
+  }
   while (byte & 0x80);
-  
+
   *data = p;
-  
+
   if ((byte & 0x40) && (shift < (sizeof(result) << 3))) {
     result |= (~0 << shift);
   }
-  
+
   return result;
 }
 
 
-/// Read a pointer encoded value and advance pointer 
-/// See Variable Length Data in: 
+/// Read a pointer encoded value and advance pointer
+/// See Variable Length Data in:
 /// @link http://dwarfstd.org/Dwarf3.pdf @unlink
 /// @param data reference variable holding memory pointer to decode from
 /// @param encoding dwarf encoding type
@@ -428,11 +428,11 @@ static uintptr_t readSLEB128(const uint8_t **data) {
 static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) {
   uintptr_t result = 0;
   const uint8_t *p = *data;
-  
-  if (encoding == llvm::dwarf::DW_EH_PE_omit) 
+
+  if (encoding == llvm::dwarf::DW_EH_PE_omit)
     return(result);
-  
-  // first get value 
+
+  // first get value
   switch (encoding & 0x0F) {
     case llvm::dwarf::DW_EH_PE_absptr:
       result = *((uintptr_t*)p);
@@ -470,15 +470,15 @@ static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) {
       p += sizeof(int64_t);
       break;
     default:
-      // not supported 
+      // not supported
       abort();
       break;
   }
-  
-  // then add relative offset 
+
+  // then add relative offset
   switch (encoding & 0x70) {
     case llvm::dwarf::DW_EH_PE_absptr:
-      // do nothing 
+      // do nothing
       break;
     case llvm::dwarf::DW_EH_PE_pcrel:
       result += (uintptr_t)(*data);
@@ -488,34 +488,34 @@ static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) {
     case llvm::dwarf::DW_EH_PE_funcrel:
     case llvm::dwarf::DW_EH_PE_aligned:
     default:
-      // not supported 
+      // not supported
       abort();
       break;
   }
-  
-  // then apply indirection 
+
+  // then apply indirection
   if (encoding & llvm::dwarf::DW_EH_PE_indirect) {
     result = *((uintptr_t*)result);
   }
-  
+
   *data = p;
-  
+
   return result;
 }
 
 
-/// Deals with Dwarf actions matching our type infos 
-/// (OurExceptionType_t instances). Returns whether or not a dwarf emitted 
-/// action matches the supplied exception type. If such a match succeeds, 
-/// the resultAction argument will be set with > 0 index value. Only 
-/// corresponding llvm.eh.selector type info arguments, cleanup arguments 
+/// Deals with Dwarf actions matching our type infos
+/// (OurExceptionType_t instances). Returns whether or not a dwarf emitted
+/// action matches the supplied exception type. If such a match succeeds,
+/// the resultAction argument will be set with > 0 index value. Only
+/// corresponding llvm.eh.selector type info arguments, cleanup arguments
 /// are supported. Filters are not supported.
-/// See Variable Length Data in: 
+/// See Variable Length Data in:
 /// @link http://dwarfstd.org/Dwarf3.pdf @unlink
 /// Also see @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink
 /// @param resultAction reference variable which will be set with result
 /// @param classInfo our array of type info pointers (to globals)
-/// @param actionEntry index into above type info array or 0 (clean up). 
+/// @param actionEntry index into above type info array or 0 (clean up).
 ///        We do not support filters.
 /// @param exceptionClass exception class (_Unwind_Exception::exception_class)
 ///        of thrown exception.
@@ -523,22 +523,22 @@ static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) {
 /// @returns whether or not a type info was found. False is returned if only
 ///          a cleanup was found
 static bool handleActionValue(int64_t *resultAction,
-                              struct OurExceptionType_t **classInfo, 
-                              uintptr_t actionEntry, 
-                              uint64_t exceptionClass, 
+                              struct OurExceptionType_t **classInfo,
+                              uintptr_t actionEntry,
+                              uint64_t exceptionClass,
                               struct _Unwind_Exception *exceptionObject) {
   bool ret = false;
-  
-  if (!resultAction || 
-      !exceptionObject || 
+
+  if (!resultAction ||
+      !exceptionObject ||
       (exceptionClass != ourBaseExceptionClass))
     return(ret);
-  
+
   struct OurBaseException_t *excp = (struct OurBaseException_t*)
   (((char*) exceptionObject) + ourBaseFromUnwindOffset);
   struct OurExceptionType_t *excpType = &(excp->type);
   int type = excpType->type;
-  
+
 #ifdef DEBUG
   fprintf(stderr,
           "handleActionValue(...): exceptionObject = <%p>, "
@@ -546,12 +546,12 @@ static bool handleActionValue(int64_t *resultAction,
           exceptionObject,
           excp);
 #endif
-  
+
   const uint8_t *actionPos = (uint8_t*) actionEntry,
   *tempActionPos;
   int64_t typeOffset = 0,
   actionOffset;
-  
+
   for (int i = 0; true; ++i) {
     // Each emitted dwarf action corresponds to a 2 tuple of
     // type info address offset, and action offset to the next
@@ -559,7 +559,7 @@ static bool handleActionValue(int64_t *resultAction,
     typeOffset = readSLEB128(&actionPos);
     tempActionPos = actionPos;
     actionOffset = readSLEB128(&tempActionPos);
-    
+
 #ifdef DEBUG
     fprintf(stderr,
             "handleActionValue(...):typeOffset: <%lld>, "
@@ -567,9 +567,9 @@ static bool handleActionValue(int64_t *resultAction,
             typeOffset,
             actionOffset);
 #endif
-    assert((typeOffset >= 0) && 
+    assert((typeOffset >= 0) &&
            "handleActionValue(...):filters are not supported.");
-    
+
     // Note: A typeOffset == 0 implies that a cleanup llvm.eh.selector
     //       argument has been matched.
     if ((typeOffset > 0) &&
@@ -583,17 +583,17 @@ static bool handleActionValue(int64_t *resultAction,
       ret = true;
       break;
     }
-    
+
 #ifdef DEBUG
     fprintf(stderr,
             "handleActionValue(...):actionValue not found.\n");
 #endif
     if (!actionOffset)
       break;
-    
+
     actionPos += actionOffset;
   }
-  
+
   return(ret);
 }
 
@@ -602,52 +602,52 @@ static bool handleActionValue(int64_t *resultAction,
 /// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink
 /// @param version unsupported (ignored), unwind version
 /// @param lsda language specific data area
-/// @param _Unwind_Action actions minimally supported unwind stage 
+/// @param _Unwind_Action actions minimally supported unwind stage
 ///        (forced specifically not supported)
 /// @param exceptionClass exception class (_Unwind_Exception::exception_class)
 ///        of thrown exception.
 /// @param exceptionObject thrown _Unwind_Exception instance.
 /// @param context unwind system context
-/// @returns minimally supported unwinding control indicator 
-static _Unwind_Reason_Code handleLsda(int version, 
+/// @returns minimally supported unwinding control indicator
+static _Unwind_Reason_Code handleLsda(int version,
                                       const uint8_t *lsda,
                                       _Unwind_Action actions,
-                                      uint64_t exceptionClass, 
+                                      uint64_t exceptionClass,
                                     struct _Unwind_Exception *exceptionObject,
                                       _Unwind_Context_t context) {
   _Unwind_Reason_Code ret = _URC_CONTINUE_UNWIND;
-  
+
   if (!lsda)
     return(ret);
-  
+
 #ifdef DEBUG
-  fprintf(stderr, 
+  fprintf(stderr,
           "handleLsda(...):lsda is non-zero.\n");
 #endif
-  
+
   // Get the current instruction pointer and offset it before next
   // instruction in the current frame which threw the exception.
   uintptr_t pc = _Unwind_GetIP(context)-1;
-  
-  // Get beginning current frame's code (as defined by the 
+
+  // Get beginning current frame's code (as defined by the
   // emitted dwarf code)
   uintptr_t funcStart = _Unwind_GetRegionStart(context);
   uintptr_t pcOffset = pc - funcStart;
   struct OurExceptionType_t **classInfo = NULL;
-  
+
   // Note: See JITDwarfEmitter::EmitExceptionTable(...) for corresponding
   //       dwarf emission
-  
+
   // Parse LSDA header.
   uint8_t lpStartEncoding = *lsda++;
-  
+
   if (lpStartEncoding != llvm::dwarf::DW_EH_PE_omit) {
-    readEncodedPointer(&lsda, lpStartEncoding); 
+    readEncodedPointer(&lsda, lpStartEncoding);
   }
-  
+
   uint8_t ttypeEncoding = *lsda++;
   uintptr_t classInfoOffset;
-  
+
   if (ttypeEncoding != llvm::dwarf::DW_EH_PE_omit) {
     // Calculate type info locations in emitted dwarf code which
     // were flagged by type info arguments to llvm.eh.selector
@@ -655,47 +655,47 @@ static _Unwind_Reason_Code handleLsda(int version,
     classInfoOffset = readULEB128(&lsda);
     classInfo = (struct OurExceptionType_t**) (lsda + classInfoOffset);
   }
-  
-  // Walk call-site table looking for range that 
-  // includes current PC. 
-  
+
+  // Walk call-site table looking for range that
+  // includes current PC.
+
   uint8_t         callSiteEncoding = *lsda++;
   uint32_t        callSiteTableLength = readULEB128(&lsda);
   const uint8_t   *callSiteTableStart = lsda;
-  const uint8_t   *callSiteTableEnd = callSiteTableStart + 
+  const uint8_t   *callSiteTableEnd = callSiteTableStart +
   callSiteTableLength;
   const uint8_t   *actionTableStart = callSiteTableEnd;
   const uint8_t   *callSitePtr = callSiteTableStart;
-  
+
   bool foreignException = false;
-  
+
   while (callSitePtr < callSiteTableEnd) {
-    uintptr_t start = readEncodedPointer(&callSitePtr, 
+    uintptr_t start = readEncodedPointer(&callSitePtr,
                                          callSiteEncoding);
-    uintptr_t length = readEncodedPointer(&callSitePtr, 
+    uintptr_t length = readEncodedPointer(&callSitePtr,
                                           callSiteEncoding);
-    uintptr_t landingPad = readEncodedPointer(&callSitePtr, 
+    uintptr_t landingPad = readEncodedPointer(&callSitePtr,
                                               callSiteEncoding);
-    
+
     // Note: Action value
     uintptr_t actionEntry = readULEB128(&callSitePtr);
-    
+
     if (exceptionClass != ourBaseExceptionClass) {
       // We have been notified of a foreign exception being thrown,
       // and we therefore need to execute cleanup landing pads
       actionEntry = 0;
       foreignException = true;
     }
-    
+
     if (landingPad == 0) {
 #ifdef DEBUG
       fprintf(stderr,
               "handleLsda(...): No landing pad found.\n");
 #endif
-      
+
       continue; // no landing pad for this entry
     }
-    
+
     if (actionEntry) {
       actionEntry += ((uintptr_t) actionTableStart) - 1;
     }
@@ -705,55 +705,55 @@ static _Unwind_Reason_Code handleLsda(int version,
               "handleLsda(...):No action table found.\n");
 #endif
     }
-    
+
     bool exceptionMatched = false;
-    
+
     if ((start <= pcOffset) && (pcOffset < (start + length))) {
 #ifdef DEBUG
       fprintf(stderr,
               "handleLsda(...): Landing pad found.\n");
 #endif
       int64_t actionValue = 0;
-      
+
       if (actionEntry) {
         exceptionMatched = handleActionValue(&actionValue,
-                                             classInfo, 
-                                             actionEntry, 
-                                             exceptionClass, 
+                                             classInfo,
+                                             actionEntry,
+                                             exceptionClass,
                                              exceptionObject);
       }
-      
+
       if (!(actions & _UA_SEARCH_PHASE)) {
 #ifdef DEBUG
         fprintf(stderr,
                 "handleLsda(...): installed landing pad "
                 "context.\n");
 #endif
-        
+
         // Found landing pad for the PC.
-        // Set Instruction Pointer to so we re-enter function 
-        // at landing pad. The landing pad is created by the 
+        // Set Instruction Pointer to so we re-enter function
+        // at landing pad. The landing pad is created by the
         // compiler to take two parameters in registers.
-        _Unwind_SetGR(context, 
-                      __builtin_eh_return_data_regno(0), 
+        _Unwind_SetGR(context,
+                      __builtin_eh_return_data_regno(0),
                       (uintptr_t)exceptionObject);
-        
+
         // Note: this virtual register directly corresponds
         //       to the return of the llvm.eh.selector intrinsic
         if (!actionEntry || !exceptionMatched) {
           // We indicate cleanup only
-          _Unwind_SetGR(context, 
-                        __builtin_eh_return_data_regno(1), 
+          _Unwind_SetGR(context,
+                        __builtin_eh_return_data_regno(1),
                         0);
         }
         else {
           // Matched type info index of llvm.eh.selector intrinsic
           // passed here.
-          _Unwind_SetGR(context, 
-                        __builtin_eh_return_data_regno(1), 
+          _Unwind_SetGR(context,
+                        __builtin_eh_return_data_regno(1),
                         actionValue);
         }
-        
+
         // To execute landing pad set here
         _Unwind_SetIP(context, funcStart + landingPad);
         ret = _URC_INSTALL_CONTEXT;
@@ -767,19 +767,19 @@ static _Unwind_Reason_Code handleLsda(int version,
       }
       else {
         // Note: Only non-clean up handlers are marked as
-        //       found. Otherwise the clean up handlers will be 
-        //       re-found and executed during the clean up 
+        //       found. Otherwise the clean up handlers will be
+        //       re-found and executed during the clean up
         //       phase.
 #ifdef DEBUG
         fprintf(stderr,
                 "handleLsda(...): cleanup handler found.\n");
 #endif
       }
-      
+
       break;
     }
   }
-  
+
   return(ret);
 }
 
@@ -788,23 +788,23 @@ static _Unwind_Reason_Code handleLsda(int version,
 /// dwarf unwind info block. Again see: JITDwarfEmitter.cpp.
 /// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink
 /// @param version unsupported (ignored), unwind version
-/// @param _Unwind_Action actions minimally supported unwind stage 
+/// @param _Unwind_Action actions minimally supported unwind stage
 ///        (forced specifically not supported)
 /// @param exceptionClass exception class (_Unwind_Exception::exception_class)
 ///        of thrown exception.
 /// @param exceptionObject thrown _Unwind_Exception instance.
 /// @param context unwind system context
-/// @returns minimally supported unwinding control indicator 
-_Unwind_Reason_Code ourPersonality(int version, 
+/// @returns minimally supported unwinding control indicator
+_Unwind_Reason_Code ourPersonality(int version,
                                    _Unwind_Action actions,
-                                   uint64_t exceptionClass, 
+                                   uint64_t exceptionClass,
                                    struct _Unwind_Exception *exceptionObject,
                                    _Unwind_Context_t context) {
 #ifdef DEBUG
-  fprintf(stderr, 
+  fprintf(stderr,
           "We are in ourPersonality(...):actions is <%d>.\n",
           actions);
-  
+
   if (actions & _UA_SEARCH_PHASE) {
     fprintf(stderr, "ourPersonality(...):In search phase.\n");
   }
@@ -812,15 +812,15 @@ _Unwind_Reason_Code ourPersonality(int version,
     fprintf(stderr, "ourPersonality(...):In non-search phase.\n");
   }
 #endif
-  
+
   const uint8_t *lsda = _Unwind_GetLanguageSpecificData(context);
-  
+
 #ifdef DEBUG
-  fprintf(stderr, 
+  fprintf(stderr,
           "ourPersonality(...):lsda = <%p>.\n",
           lsda);
 #endif
-  
+
   // The real work of the personality function is captured here
   return(handleLsda(version,
                     lsda,
@@ -841,12 +841,12 @@ _Unwind_Reason_Code ourPersonality(int version,
 uint64_t genClass(const unsigned char classChars[], size_t classCharsSize)
 {
   uint64_t ret = classChars[0];
-  
+
   for (unsigned i = 1; i < classCharsSize; ++i) {
     ret <<= 8;
     ret += classChars[i];
   }
-  
+
   return(ret);
 }
 
@@ -865,37 +865,37 @@ uint64_t genClass(const unsigned char classChars[], size_t classCharsSize)
 /// @param module code for module instance
 /// @param builder builder instance
 /// @param toPrint string to print
-/// @param useGlobal A value of true (default) indicates a GlobalValue is 
-///        generated, and is used to hold the constant string. A value of 
-///        false indicates that the constant string will be stored on the 
+/// @param useGlobal A value of true (default) indicates a GlobalValue is
+///        generated, and is used to hold the constant string. A value of
+///        false indicates that the constant string will be stored on the
 ///        stack.
-void generateStringPrint(llvm::LLVMContext &context, 
+void generateStringPrint(llvm::LLVMContext &context,
                          llvm::Module &module,
-                         llvm::IRBuilder<> &builder, 
+                         llvm::IRBuilder<> &builder,
                          std::string toPrint,
                          bool useGlobal = true) {
   llvm::Function *printFunct = module.getFunction("printStr");
-  
+
   llvm::Value *stringVar;
-  llvm::Constant *stringConstant = 
+  llvm::Constant *stringConstant =
   llvm::ConstantDataArray::getString(context, toPrint);
-  
+
   if (useGlobal) {
     // Note: Does not work without allocation
-    stringVar = 
-    new llvm::GlobalVariable(module, 
+    stringVar =
+    new llvm::GlobalVariable(module,
                              stringConstant->getType(),
-                             true, 
-                             llvm::GlobalValue::LinkerPrivateLinkage, 
-                             stringConstant, 
+                             true,
+                             llvm::GlobalValue::LinkerPrivateLinkage,
+                             stringConstant,
                              "");
   }
   else {
     stringVar = builder.CreateAlloca(stringConstant->getType());
     builder.CreateStore(stringConstant, stringVar);
   }
-  
-  llvm::Value *cast = builder.CreatePointerCast(stringVar, 
+
+  llvm::Value *cast = builder.CreatePointerCast(stringVar,
                                                 builder.getInt8PtrTy());
   builder.CreateCall(printFunct, cast);
 }
@@ -909,49 +909,49 @@ void generateStringPrint(llvm::LLVMContext &context,
 /// @param printFunct function used to "print" integer
 /// @param toPrint string to print
 /// @param format printf like formating string for print
-/// @param useGlobal A value of true (default) indicates a GlobalValue is 
-///        generated, and is used to hold the constant string. A value of 
-///        false indicates that the constant string will be stored on the 
+/// @param useGlobal A value of true (default) indicates a GlobalValue is
+///        generated, and is used to hold the constant string. A value of
+///        false indicates that the constant string will be stored on the
 ///        stack.
-void generateIntegerPrint(llvm::LLVMContext &context, 
+void generateIntegerPrint(llvm::LLVMContext &context,
                           llvm::Module &module,
-                          llvm::IRBuilder<> &builder, 
+                          llvm::IRBuilder<> &builder,
                           llvm::Function &printFunct,
                           llvm::Value &toPrint,
-                          std::string format, 
+                          std::string format,
                           bool useGlobal = true) {
   llvm::Constant *stringConstant =
     llvm::ConstantDataArray::getString(context, format);
   llvm::Value *stringVar;
-  
+
   if (useGlobal) {
     // Note: Does not seem to work without allocation
-    stringVar = 
-    new llvm::GlobalVariable(module, 
+    stringVar =
+    new llvm::GlobalVariable(module,
                              stringConstant->getType(),
-                             true, 
-                             llvm::GlobalValue::LinkerPrivateLinkage, 
-                             stringConstant, 
+                             true,
+                             llvm::GlobalValue::LinkerPrivateLinkage,
+                             stringConstant,
                              "");
   }
   else {
     stringVar = builder.CreateAlloca(stringConstant->getType());
     builder.CreateStore(stringConstant, stringVar);
   }
-  
-  llvm::Value *cast = builder.CreateBitCast(stringVar, 
+
+  llvm::Value *cast = builder.CreateBitCast(stringVar,
                                             builder.getInt8PtrTy());
   builder.CreateCall2(&printFunct, &toPrint, cast);
 }
 
 
-/// Generates code to handle finally block type semantics: always runs 
-/// regardless of whether a thrown exception is passing through or the 
-/// parent function is simply exiting. In addition to printing some state 
-/// to stderr, this code will resume the exception handling--runs the 
-/// unwind resume block, if the exception has not been previously caught 
-/// by a catch clause, and will otherwise execute the end block (terminator 
-/// block). In addition this function creates the corresponding function's 
+/// Generates code to handle finally block type semantics: always runs
+/// regardless of whether a thrown exception is passing through or the
+/// parent function is simply exiting. In addition to printing some state
+/// to stderr, this code will resume the exception handling--runs the
+/// unwind resume block, if the exception has not been previously caught
+/// by a catch clause, and will otherwise execute the end block (terminator
+/// block). In addition this function creates the corresponding function's
 /// stack storage for the exception pointer and catch flag status.
 /// @param context llvm context
 /// @param module code for module instance
@@ -965,9 +965,9 @@ void generateIntegerPrint(llvm::LLVMContext &context,
 /// @param exceptionStorage reference to exception pointer storage
 /// @param caughtResultStorage reference to landingpad result storage
 /// @returns newly created block
-static llvm::BasicBlock *createFinallyBlock(llvm::LLVMContext &context, 
-                                            llvm::Module &module, 
-                                            llvm::IRBuilder<> &builder, 
+static llvm::BasicBlock *createFinallyBlock(llvm::LLVMContext &context,
+                                            llvm::Module &module,
+                                            llvm::IRBuilder<> &builder,
                                             llvm::Function &toAddTo,
                                             std::string &blockName,
                                             std::string &functionId,
@@ -976,21 +976,21 @@ static llvm::BasicBlock *createFinallyBlock(llvm::LLVMContext &context,
                                             llvm::Value **exceptionCaughtFlag,
                                             llvm::Value **exceptionStorage,
                                             llvm::Value **caughtResultStorage) {
-  assert(exceptionCaughtFlag && 
+  assert(exceptionCaughtFlag &&
          "ExceptionDemo::createFinallyBlock(...):exceptionCaughtFlag "
          "is NULL");
-  assert(exceptionStorage && 
+  assert(exceptionStorage &&
          "ExceptionDemo::createFinallyBlock(...):exceptionStorage "
          "is NULL");
-  assert(caughtResultStorage && 
+  assert(caughtResultStorage &&
          "ExceptionDemo::createFinallyBlock(...):caughtResultStorage "
          "is NULL");
-  
+
   *exceptionCaughtFlag = createEntryBlockAlloca(toAddTo,
                                          "exceptionCaught",
                                          ourExceptionNotThrownState->getType(),
                                          ourExceptionNotThrownState);
-  
+
   llvm::PointerType *exceptionStorageType = builder.getInt8PtrTy();
   *exceptionStorage = createEntryBlockAlloca(toAddTo,
                                              "exceptionStorage",
@@ -1002,35 +1002,35 @@ static llvm::BasicBlock *createFinallyBlock(llvm::LLVMContext &context,
                                               ourCaughtResultType,
                                               llvm::ConstantAggregateZero::get(
                                                 ourCaughtResultType));
-  
+
   llvm::BasicBlock *ret = llvm::BasicBlock::Create(context,
                                                    blockName,
                                                    &toAddTo);
-  
+
   builder.SetInsertPoint(ret);
-  
+
   std::ostringstream bufferToPrint;
   bufferToPrint << "Gen: Executing finally block "
     << blockName << " in " << functionId << "\n";
-  generateStringPrint(context, 
-                      module, 
-                      builder, 
+  generateStringPrint(context,
+                      module,
+                      builder,
                       bufferToPrint.str(),
                       USE_GLOBAL_STR_CONSTS);
-  
+
   llvm::SwitchInst *theSwitch = builder.CreateSwitch(builder.CreateLoad(
-                                                       *exceptionCaughtFlag), 
+                                                       *exceptionCaughtFlag),
                                                      &terminatorBlock,
                                                      2);
   theSwitch->addCase(ourExceptionCaughtState, &terminatorBlock);
   theSwitch->addCase(ourExceptionThrownState, &unwindResumeBlock);
-  
+
   return(ret);
 }
 
 
 /// Generates catch block semantics which print a string to indicate type of
-/// catch executed, sets an exception caught flag, and executes passed in 
+/// catch executed, sets an exception caught flag, and executes passed in
 /// end block (terminator block).
 /// @param context llvm context
 /// @param module code for module instance
@@ -1041,52 +1041,52 @@ static llvm::BasicBlock *createFinallyBlock(llvm::LLVMContext &context,
 /// @param terminatorBlock terminator "end" block
 /// @param exceptionCaughtFlag exception caught/thrown status
 /// @returns newly created block
-static llvm::BasicBlock *createCatchBlock(llvm::LLVMContext &context, 
-                                          llvm::Module &module, 
-                                          llvm::IRBuilder<> &builder, 
+static llvm::BasicBlock *createCatchBlock(llvm::LLVMContext &context,
+                                          llvm::Module &module,
+                                          llvm::IRBuilder<> &builder,
                                           llvm::Function &toAddTo,
                                           std::string &blockName,
                                           std::string &functionId,
                                           llvm::BasicBlock &terminatorBlock,
                                           llvm::Value &exceptionCaughtFlag) {
-  
+
   llvm::BasicBlock *ret = llvm::BasicBlock::Create(context,
                                                    blockName,
                                                    &toAddTo);
-  
+
   builder.SetInsertPoint(ret);
-  
+
   std::ostringstream bufferToPrint;
   bufferToPrint << "Gen: Executing catch block "
   << blockName
   << " in "
   << functionId
   << std::endl;
-  generateStringPrint(context, 
-                      module, 
-                      builder, 
+  generateStringPrint(context,
+                      module,
+                      builder,
                       bufferToPrint.str(),
                       USE_GLOBAL_STR_CONSTS);
   builder.CreateStore(ourExceptionCaughtState, &exceptionCaughtFlag);
   builder.CreateBr(&terminatorBlock);
-  
+
   return(ret);
 }
 
 
-/// Generates a function which invokes a function (toInvoke) and, whose 
-/// unwind block will "catch" the type info types correspondingly held in the 
-/// exceptionTypesToCatch argument. If the toInvoke function throws an 
-/// exception which does not match any type info types contained in 
-/// exceptionTypesToCatch, the generated code will call _Unwind_Resume 
-/// with the raised exception. On the other hand the generated code will 
+/// Generates a function which invokes a function (toInvoke) and, whose
+/// unwind block will "catch" the type info types correspondingly held in the
+/// exceptionTypesToCatch argument. If the toInvoke function throws an
+/// exception which does not match any type info types contained in
+/// exceptionTypesToCatch, the generated code will call _Unwind_Resume
+/// with the raised exception. On the other hand the generated code will
 /// normally exit if the toInvoke function does not throw an exception.
-/// The generated "finally" block is always run regardless of the cause of 
+/// The generated "finally" block is always run regardless of the cause of
 /// the generated function exit.
 /// The generated function is returned after being verified.
 /// @param module code for module instance
 /// @param builder builder instance
-/// @param fpm a function pass manager holding optional IR to IR 
+/// @param fpm a function pass manager holding optional IR to IR
 ///        transformations
 /// @param toInvoke inner function to invoke
 /// @param ourId id used to printing purposes
@@ -1094,76 +1094,76 @@ static llvm::BasicBlock *createCatchBlock(llvm::LLVMContext &context,
 /// @param exceptionTypesToCatch array of type info types to "catch"
 /// @returns generated function
 static
-llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module, 
-                                             llvm::IRBuilder<> &builder, 
+llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
+                                             llvm::IRBuilder<> &builder,
                                              llvm::FunctionPassManager &fpm,
                                              llvm::Function &toInvoke,
                                              std::string ourId,
                                              unsigned numExceptionsToCatch,
                                              unsigned exceptionTypesToCatch[]) {
-  
+
   llvm::LLVMContext &context = module.getContext();
   llvm::Function *toPrint32Int = module.getFunction("print32Int");
-  
+
   ArgTypes argTypes;
   argTypes.push_back(builder.getInt32Ty());
-  
+
   ArgNames argNames;
   argNames.push_back("exceptTypeToThrow");
-  
-  llvm::Function *ret = createFunction(module, 
+
+  llvm::Function *ret = createFunction(module,
                                        builder.getVoidTy(),
-                                       argTypes, 
-                                       argNames, 
+                                       argTypes,
+                                       argNames,
                                        ourId,
-                                       llvm::Function::ExternalLinkage, 
-                                       false, 
+                                       llvm::Function::ExternalLinkage,
+                                       false,
                                        false);
-  
+
   // Block which calls invoke
   llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context,
-                                                          "entry", 
+                                                          "entry",
                                                           ret);
   // Normal block for invoke
-  llvm::BasicBlock *normalBlock = llvm::BasicBlock::Create(context, 
-                                                           "normal", 
+  llvm::BasicBlock *normalBlock = llvm::BasicBlock::Create(context,
+                                                           "normal",
                                                            ret);
   // Unwind block for invoke
-  llvm::BasicBlock *exceptionBlock = llvm::BasicBlock::Create(context, 
-                                                              "exception", 
+  llvm::BasicBlock *exceptionBlock = llvm::BasicBlock::Create(context,
+                                                              "exception",
                                                               ret);
-  
+
   // Block which routes exception to correct catch handler block
-  llvm::BasicBlock *exceptionRouteBlock = llvm::BasicBlock::Create(context, 
-                                                             "exceptionRoute", 
+  llvm::BasicBlock *exceptionRouteBlock = llvm::BasicBlock::Create(context,
+                                                             "exceptionRoute",
                                                              ret);
-  
+
   // Foreign exception handler
-  llvm::BasicBlock *externalExceptionBlock = llvm::BasicBlock::Create(context, 
-                                                          "externalException", 
+  llvm::BasicBlock *externalExceptionBlock = llvm::BasicBlock::Create(context,
+                                                          "externalException",
                                                           ret);
-  
+
   // Block which calls _Unwind_Resume
-  llvm::BasicBlock *unwindResumeBlock = llvm::BasicBlock::Create(context, 
-                                                               "unwindResume", 
+  llvm::BasicBlock *unwindResumeBlock = llvm::BasicBlock::Create(context,
+                                                               "unwindResume",
                                                                ret);
-  
+
   // Clean up block which delete exception if needed
   llvm::BasicBlock *endBlock = llvm::BasicBlock::Create(context, "end", ret);
-  
+
   std::string nextName;
   std::vector<llvm::BasicBlock*> catchBlocks(numExceptionsToCatch);
   llvm::Value *exceptionCaughtFlag = NULL;
   llvm::Value *exceptionStorage = NULL;
   llvm::Value *caughtResultStorage = NULL;
-  
-  // Finally block which will branch to unwindResumeBlock if 
+
+  // Finally block which will branch to unwindResumeBlock if
   // exception is not caught. Initializes/allocates stack locations.
-  llvm::BasicBlock *finallyBlock = createFinallyBlock(context, 
-                                                      module, 
-                                                      builder, 
-                                                      *ret, 
-                                                      nextName = "finally", 
+  llvm::BasicBlock *finallyBlock = createFinallyBlock(context,
+                                                      module,
+                                                      builder,
+                                                      *ret,
+                                                      nextName = "finally",
                                                       ourId,
                                                       *endBlock,
                                                       *unwindResumeBlock,
@@ -1171,74 +1171,74 @@ llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
                                                       &exceptionStorage,
                                                       &caughtResultStorage
                                                       );
-  
+
   for (unsigned i = 0; i < numExceptionsToCatch; ++i) {
     nextName = ourTypeInfoNames[exceptionTypesToCatch[i]];
-    
+
     // One catch block per type info to be caught
-    catchBlocks[i] = createCatchBlock(context, 
-                                      module, 
-                                      builder, 
+    catchBlocks[i] = createCatchBlock(context,
+                                      module,
+                                      builder,
                                       *ret,
-                                      nextName, 
+                                      nextName,
                                       ourId,
                                       *finallyBlock,
                                       *exceptionCaughtFlag);
   }
-  
+
   // Entry Block
-  
+
   builder.SetInsertPoint(entryBlock);
-  
+
   std::vector<llvm::Value*> args;
   args.push_back(namedValues["exceptTypeToThrow"]);
-  builder.CreateInvoke(&toInvoke, 
-                       normalBlock, 
-                       exceptionBlock, 
+  builder.CreateInvoke(&toInvoke,
+                       normalBlock,
+                       exceptionBlock,
                        args);
-  
+
   // End Block
-  
+
   builder.SetInsertPoint(endBlock);
-  
-  generateStringPrint(context, 
+
+  generateStringPrint(context,
                       module,
-                      builder, 
+                      builder,
                       "Gen: In end block: exiting in " + ourId + ".\n",
                       USE_GLOBAL_STR_CONSTS);
   llvm::Function *deleteOurException = module.getFunction("deleteOurException");
-  
+
   // Note: function handles NULL exceptions
-  builder.CreateCall(deleteOurException, 
+  builder.CreateCall(deleteOurException,
                      builder.CreateLoad(exceptionStorage));
   builder.CreateRetVoid();
-  
+
   // Normal Block
-  
+
   builder.SetInsertPoint(normalBlock);
-  
-  generateStringPrint(context, 
+
+  generateStringPrint(context,
                       module,
-                      builder, 
+                      builder,
                       "Gen: No exception in " + ourId + "!\n",
                       USE_GLOBAL_STR_CONSTS);
-  
+
   // Finally block is always called
   builder.CreateBr(finallyBlock);
-  
+
   // Unwind Resume Block
-  
+
   builder.SetInsertPoint(unwindResumeBlock);
-  
+
   builder.CreateResume(builder.CreateLoad(caughtResultStorage));
-  
+
   // Exception Block
-  
+
   builder.SetInsertPoint(exceptionBlock);
-  
+
   llvm::Function *personality = module.getFunction("ourPersonality");
-  
-  llvm::LandingPadInst *caughtResult = 
+
+  llvm::LandingPadInst *caughtResult =
     builder.CreateLandingPad(ourCaughtResultType,
                              personality,
                              numExceptionsToCatch,
@@ -1255,48 +1255,48 @@ llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
   llvm::Value *unwindException = builder.CreateExtractValue(caughtResult, 0);
   llvm::Value *retTypeInfoIndex = builder.CreateExtractValue(caughtResult, 1);
 
-  // FIXME: Redundant storage which, beyond utilizing value of 
-  //        caughtResultStore for unwindException storage, may be alleviated 
+  // FIXME: Redundant storage which, beyond utilizing value of
+  //        caughtResultStore for unwindException storage, may be alleviated
   //        altogether with a block rearrangement
   builder.CreateStore(caughtResult, caughtResultStorage);
   builder.CreateStore(unwindException, exceptionStorage);
   builder.CreateStore(ourExceptionThrownState, exceptionCaughtFlag);
-  
-  // Retrieve exception_class member from thrown exception 
+
+  // Retrieve exception_class member from thrown exception
   // (_Unwind_Exception instance). This member tells us whether or not
   // the exception is foreign.
-  llvm::Value *unwindExceptionClass = 
+  llvm::Value *unwindExceptionClass =
     builder.CreateLoad(builder.CreateStructGEP(
-             builder.CreatePointerCast(unwindException, 
-                                       ourUnwindExceptionType->getPointerTo()), 
+             builder.CreatePointerCast(unwindException,
+                                       ourUnwindExceptionType->getPointerTo()),
                                                0));
-  
+
   // Branch to the externalExceptionBlock if the exception is foreign or
   // to a catch router if not. Either way the finally block will be run.
   builder.CreateCondBr(builder.CreateICmpEQ(unwindExceptionClass,
-                            llvm::ConstantInt::get(builder.getInt64Ty(), 
+                            llvm::ConstantInt::get(builder.getInt64Ty(),
                                                    ourBaseExceptionClass)),
                        exceptionRouteBlock,
                        externalExceptionBlock);
-  
+
   // External Exception Block
-  
+
   builder.SetInsertPoint(externalExceptionBlock);
-  
-  generateStringPrint(context, 
+
+  generateStringPrint(context,
                       module,
-                      builder, 
+                      builder,
                       "Gen: Foreign exception received.\n",
                       USE_GLOBAL_STR_CONSTS);
-  
+
   // Branch to the finally block
   builder.CreateBr(finallyBlock);
-  
+
   // Exception Route Block
-  
+
   builder.SetInsertPoint(exceptionRouteBlock);
-  
-  // Casts exception pointer (_Unwind_Exception instance) to parent 
+
+  // Casts exception pointer (_Unwind_Exception instance) to parent
   // (OurException instance).
   //
   // Note: ourBaseFromUnwindOffset is usually negative
@@ -1304,34 +1304,34 @@ llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
                                   builder.CreateConstGEP1_64(unwindException,
                                                        ourBaseFromUnwindOffset),
                                   ourExceptionType->getPointerTo());
-  
+
   // Retrieve thrown exception type info type
   //
   // Note: Index is not relative to pointer but instead to structure
   //       unlike a true getelementptr (GEP) instruction
   typeInfoThrown = builder.CreateStructGEP(typeInfoThrown, 0);
-  
-  llvm::Value *typeInfoThrownType = 
+
+  llvm::Value *typeInfoThrownType =
   builder.CreateStructGEP(typeInfoThrown, 0);
-  
-  generateIntegerPrint(context, 
+
+  generateIntegerPrint(context,
                        module,
-                       builder, 
-                       *toPrint32Int, 
+                       builder,
+                       *toPrint32Int,
                        *(builder.CreateLoad(typeInfoThrownType)),
-                       "Gen: Exception type <%d> received (stack unwound) " 
-                       " in " + 
-                       ourId + 
+                       "Gen: Exception type <%d> received (stack unwound) "
+                       " in " +
+                       ourId +
                        ".\n",
                        USE_GLOBAL_STR_CONSTS);
-  
+
   // Route to matched type info catch block or run cleanup finally block
-  llvm::SwitchInst *switchToCatchBlock = builder.CreateSwitch(retTypeInfoIndex, 
-                                                          finallyBlock, 
+  llvm::SwitchInst *switchToCatchBlock = builder.CreateSwitch(retTypeInfoIndex,
+                                                          finallyBlock,
                                                           numExceptionsToCatch);
-  
+
   unsigned nextTypeToCatch;
-  
+
   for (unsigned i = 1; i <= numExceptionsToCatch; ++i) {
     nextTypeToCatch = i - 1;
     switchToCatchBlock->addCase(llvm::ConstantInt::get(
@@ -1341,18 +1341,18 @@ llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
 
   llvm::verifyFunction(*ret);
   fpm.run(*ret);
-  
+
   return(ret);
 }
 
 
 /// Generates function which throws either an exception matched to a runtime
-/// determined type info type (argument to generated function), or if this 
-/// runtime value matches nativeThrowType, throws a foreign exception by 
+/// determined type info type (argument to generated function), or if this
+/// runtime value matches nativeThrowType, throws a foreign exception by
 /// calling nativeThrowFunct.
 /// @param module code for module instance
 /// @param builder builder instance
-/// @param fpm a function pass manager holding optional IR to IR 
+/// @param fpm a function pass manager holding optional IR to IR
 ///        transformations
 /// @param ourId id used to printing purposes
 /// @param nativeThrowType a runtime argument of this value results in
@@ -1361,8 +1361,8 @@ llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
 ///        if the above nativeThrowType matches generated function's arg.
 /// @returns generated function
 static
-llvm::Function *createThrowExceptionFunction(llvm::Module &module, 
-                                             llvm::IRBuilder<> &builder, 
+llvm::Function *createThrowExceptionFunction(llvm::Module &module,
+                                             llvm::IRBuilder<> &builder,
                                              llvm::FunctionPassManager &fpm,
                                              std::string ourId,
                                              int32_t nativeThrowType,
@@ -1373,7 +1373,7 @@ llvm::Function *createThrowExceptionFunction(llvm::Module &module,
   unwindArgTypes.push_back(builder.getInt32Ty());
   ArgNames unwindArgNames;
   unwindArgNames.push_back("exceptTypeToThrow");
-  
+
   llvm::Function *ret = createFunction(module,
                                        builder.getVoidTy(),
                                        unwindArgTypes,
@@ -1382,88 +1382,88 @@ llvm::Function *createThrowExceptionFunction(llvm::Module &module,
                                        llvm::Function::ExternalLinkage,
                                        false,
                                        false);
-  
+
   // Throws either one of our exception or a native C++ exception depending
   // on a runtime argument value containing a type info type.
   llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context,
-                                                          "entry", 
+                                                          "entry",
                                                           ret);
   // Throws a foreign exception
   llvm::BasicBlock *nativeThrowBlock = llvm::BasicBlock::Create(context,
-                                                                "nativeThrow", 
+                                                                "nativeThrow",
                                                                 ret);
   // Throws one of our Exceptions
   llvm::BasicBlock *generatedThrowBlock = llvm::BasicBlock::Create(context,
-                                                             "generatedThrow", 
+                                                             "generatedThrow",
                                                              ret);
   // Retrieved runtime type info type to throw
   llvm::Value *exceptionType = namedValues["exceptTypeToThrow"];
-  
+
   // nativeThrowBlock block
-  
+
   builder.SetInsertPoint(nativeThrowBlock);
-  
+
   // Throws foreign exception
   builder.CreateCall(&nativeThrowFunct, exceptionType);
   builder.CreateUnreachable();
-  
+
   // entry block
-  
+
   builder.SetInsertPoint(entryBlock);
-  
+
   llvm::Function *toPrint32Int = module.getFunction("print32Int");
-  generateIntegerPrint(context, 
+  generateIntegerPrint(context,
                        module,
-                       builder, 
-                       *toPrint32Int, 
-                       *exceptionType, 
-                       "\nGen: About to throw exception type <%d> in " + 
-                       ourId + 
+                       builder,
+                       *toPrint32Int,
+                       *exceptionType,
+                       "\nGen: About to throw exception type <%d> in " +
+                       ourId +
                        ".\n",
                        USE_GLOBAL_STR_CONSTS);
-  
+
   // Switches on runtime type info type value to determine whether or not
-  // a foreign exception is thrown. Defaults to throwing one of our 
+  // a foreign exception is thrown. Defaults to throwing one of our
   // generated exceptions.
   llvm::SwitchInst *theSwitch = builder.CreateSwitch(exceptionType,
                                                      generatedThrowBlock,
                                                      1);
-  
-  theSwitch->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), 
+
+  theSwitch->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(context),
                                             nativeThrowType),
                      nativeThrowBlock);
-  
+
   // generatedThrow block
-  
+
   builder.SetInsertPoint(generatedThrowBlock);
-  
+
   llvm::Function *createOurException = module.getFunction("createOurException");
   llvm::Function *raiseOurException = module.getFunction(
                                         "_Unwind_RaiseException");
-  
+
   // Creates exception to throw with runtime type info type.
-  llvm::Value *exception = builder.CreateCall(createOurException, 
+  llvm::Value *exception = builder.CreateCall(createOurException,
                                               namedValues["exceptTypeToThrow"]);
-  
+
   // Throw generated Exception
   builder.CreateCall(raiseOurException, exception);
   builder.CreateUnreachable();
-  
+
   llvm::verifyFunction(*ret);
   fpm.run(*ret);
-  
+
   return(ret);
 }
 
 static void createStandardUtilityFunctions(unsigned numTypeInfos,
-                                           llvm::Module &module, 
+                                           llvm::Module &module,
                                            llvm::IRBuilder<> &builder);
 
-/// Creates test code by generating and organizing these functions into the 
+/// Creates test code by generating and organizing these functions into the
 /// test case. The test case consists of an outer function setup to invoke
-/// an inner function within an environment having multiple catch and single 
+/// an inner function within an environment having multiple catch and single
 /// finally blocks. This inner function is also setup to invoke a throw
-/// function within an evironment similar in nature to the outer function's 
+/// function within an evironment similar in nature to the outer function's
 /// catch and finally blocks. Each of these two functions catch mutually
 /// exclusive subsets (even or odd) of the type info types configured
 /// for this this. All generated functions have a runtime argument which
@@ -1474,26 +1474,26 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
 /// a supplied a function which in turn will throw a foreign exception.
 /// @param module code for module instance
 /// @param builder builder instance
-/// @param fpm a function pass manager holding optional IR to IR 
+/// @param fpm a function pass manager holding optional IR to IR
 ///        transformations
 /// @param nativeThrowFunctName name of external function which will throw
 ///        a foreign exception
 /// @returns outermost generated test function.
-llvm::Function *createUnwindExceptionTest(llvm::Module &module, 
-                                          llvm::IRBuilder<> &builder, 
+llvm::Function *createUnwindExceptionTest(llvm::Module &module,
+                                          llvm::IRBuilder<> &builder,
                                           llvm::FunctionPassManager &fpm,
                                           std::string nativeThrowFunctName) {
   // Number of type infos to generate
   unsigned numTypeInfos = 6;
-  
+
   // Initialze intrisics and external functions to use along with exception
   // and type info globals.
   createStandardUtilityFunctions(numTypeInfos,
                                  module,
                                  builder);
   llvm::Function *nativeThrowFunct = module.getFunction(nativeThrowFunctName);
-  
-  // Create exception throw function using the value ~0 to cause 
+
+  // Create exception throw function using the value ~0 to cause
   // foreign exceptions to be thrown.
   llvm::Function *throwFunct = createThrowExceptionFunction(module,
                                                             builder,
@@ -1503,9 +1503,9 @@ llvm::Function *createUnwindExceptionTest(llvm::Module &module,
                                                             *nativeThrowFunct);
   // Inner function will catch even type infos
   unsigned innerExceptionTypesToCatch[] = {6, 2, 4};
-  size_t numExceptionTypesToCatch = sizeof(innerExceptionTypesToCatch) / 
+  size_t numExceptionTypesToCatch = sizeof(innerExceptionTypesToCatch) /
                                     sizeof(unsigned);
-  
+
   // Generate inner function.
   llvm::Function *innerCatchFunct = createCatchWrappedInvokeFunction(module,
                                                     builder,
@@ -1514,12 +1514,12 @@ llvm::Function *createUnwindExceptionTest(llvm::Module &module,
                                                     "innerCatchFunct",
                                                     numExceptionTypesToCatch,
                                                     innerExceptionTypesToCatch);
-  
+
   // Outer function will catch odd type infos
   unsigned outerExceptionTypesToCatch[] = {3, 1, 5};
-  numExceptionTypesToCatch = sizeof(outerExceptionTypesToCatch) / 
+  numExceptionTypesToCatch = sizeof(outerExceptionTypesToCatch) /
   sizeof(unsigned);
-  
+
   // Generate outer function
   llvm::Function *outerCatchFunct = createCatchWrappedInvokeFunction(module,
                                                     builder,
@@ -1528,7 +1528,7 @@ llvm::Function *createUnwindExceptionTest(llvm::Module &module,
                                                     "outerCatchFunct",
                                                     numExceptionTypesToCatch,
                                                     outerExceptionTypesToCatch);
-  
+
   // Return outer function to run
   return(outerCatchFunct);
 }
@@ -1539,15 +1539,15 @@ class OurCppRunException : public std::runtime_error {
 public:
   OurCppRunException(const std::string reason) :
   std::runtime_error(reason) {}
-  
+
   OurCppRunException (const OurCppRunException &toCopy) :
   std::runtime_error(toCopy) {}
-  
+
   OurCppRunException &operator = (const OurCppRunException &toCopy) {
     return(reinterpret_cast<OurCppRunException&>(
                                  std::runtime_error::operator=(toCopy)));
   }
-  
+
   ~OurCppRunException (void) throw () {}
 };
 
@@ -1562,7 +1562,7 @@ void throwCppException (int32_t ignoreIt) {
 
 typedef void (*OurExceptionThrowFunctType) (int32_t typeToThrow);
 
-/// This is a test harness which runs test by executing generated 
+/// This is a test harness which runs test by executing generated
 /// function with a type info type to throw. Harness wraps the execution
 /// of generated function in a C++ try catch clause.
 /// @param engine execution engine to use for executing generated function.
@@ -1572,15 +1572,15 @@ typedef void (*OurExceptionThrowFunctType) (int32_t typeToThrow);
 /// @param typeToThrow type info type of generated exception to throw, or
 ///        indicator to cause foreign exception to be thrown.
 static
-void runExceptionThrow(llvm::ExecutionEngine *engine, 
-                       llvm::Function *function, 
+void runExceptionThrow(llvm::ExecutionEngine *engine,
+                       llvm::Function *function,
                        int32_t typeToThrow) {
-  
+
   // Find test's function pointer
-  OurExceptionThrowFunctType functPtr = 
+  OurExceptionThrowFunctType functPtr =
     reinterpret_cast<OurExceptionThrowFunctType>(
        reinterpret_cast<intptr_t>(engine->getPointerToFunction(function)));
-  
+
   try {
     // Run test
     (*functPtr)(typeToThrow);
@@ -1589,15 +1589,15 @@ void runExceptionThrow(llvm::ExecutionEngine *engine,
     // Catch foreign C++ exception
     fprintf(stderr,
             "\nrunExceptionThrow(...):In C++ catch OurCppRunException "
-            "with reason: %s.\n", 
+            "with reason: %s.\n",
             exc.what());
   }
   catch (...) {
-    // Catch all exceptions including our generated ones. This latter 
+    // Catch all exceptions including our generated ones. This latter
     // functionality works according to the example in rules 1.6.4 of
-    // http://sourcery.mentor.com/public/cxx-abi/abi-eh.html (v1.22), 
-    // given that these will be exceptions foreign to C++ 
-    // (the _Unwind_Exception::exception_class should be different from 
+    // http://sourcery.mentor.com/public/cxx-abi/abi-eh.html (v1.22),
+    // given that these will be exceptions foreign to C++
+    // (the _Unwind_Exception::exception_class should be different from
     // the one used by C++).
     fprintf(stderr,
             "\nrunExceptionThrow(...):In C++ catch all.\n");
@@ -1610,32 +1610,32 @@ void runExceptionThrow(llvm::ExecutionEngine *engine,
 
 typedef llvm::ArrayRef<llvm::Type*> TypeArray;
 
-/// This initialization routine creates type info globals and 
+/// This initialization routine creates type info globals and
 /// adds external function declarations to module.
 /// @param numTypeInfos number of linear type info associated type info types
 ///        to create as GlobalVariable instances, starting with the value 1.
 /// @param module code for module instance
 /// @param builder builder instance
 static void createStandardUtilityFunctions(unsigned numTypeInfos,
-                                           llvm::Module &module, 
+                                           llvm::Module &module,
                                            llvm::IRBuilder<> &builder) {
-  
+
   llvm::LLVMContext &context = module.getContext();
-  
+
   // Exception initializations
-  
+
   // Setup exception catch state
-  ourExceptionNotThrownState = 
+  ourExceptionNotThrownState =
     llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 0),
-  ourExceptionThrownState = 
+  ourExceptionThrownState =
     llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 1),
-  ourExceptionCaughtState = 
+  ourExceptionCaughtState =
     llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 2),
-  
-  
-  
+
+
+
   // Create our type info type
-  ourTypeInfoType = llvm::StructType::get(context, 
+  ourTypeInfoType = llvm::StructType::get(context,
                                           TypeArray(builder.getInt32Ty()));
 
   llvm::Type *caughtResultFieldTypes[] = {
@@ -1648,47 +1648,47 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
                                             TypeArray(caughtResultFieldTypes));
 
   // Create OurException type
-  ourExceptionType = llvm::StructType::get(context, 
+  ourExceptionType = llvm::StructType::get(context,
                                            TypeArray(ourTypeInfoType));
-  
+
   // Create portion of _Unwind_Exception type
   //
   // Note: Declaring only a portion of the _Unwind_Exception struct.
   //       Does this cause problems?
   ourUnwindExceptionType =
-    llvm::StructType::get(context, 
+    llvm::StructType::get(context,
                     TypeArray(builder.getInt64Ty()));
 
   struct OurBaseException_t dummyException;
-  
+
   // Calculate offset of OurException::unwindException member.
-  ourBaseFromUnwindOffset = ((uintptr_t) &dummyException) - 
+  ourBaseFromUnwindOffset = ((uintptr_t) &dummyException) -
                             ((uintptr_t) &(dummyException.unwindException));
-  
+
 #ifdef DEBUG
   fprintf(stderr,
           "createStandardUtilityFunctions(...):ourBaseFromUnwindOffset "
           "= %lld, sizeof(struct OurBaseException_t) - "
           "sizeof(struct _Unwind_Exception) = %lu.\n",
           ourBaseFromUnwindOffset,
-          sizeof(struct OurBaseException_t) - 
+          sizeof(struct OurBaseException_t) -
           sizeof(struct _Unwind_Exception));
 #endif
-  
+
   size_t numChars = sizeof(ourBaseExcpClassChars) / sizeof(char);
-  
+
   // Create our _Unwind_Exception::exception_class value
   ourBaseExceptionClass = genClass(ourBaseExcpClassChars, numChars);
-  
+
   // Type infos
-  
+
   std::string baseStr = "typeInfo", typeInfoName;
   std::ostringstream typeInfoNameBuilder;
   std::vector<llvm::Constant*> structVals;
-  
+
   llvm::Constant *nextStruct;
   llvm::GlobalVariable *nextGlobal = NULL;
-  
+
   // Generate each type info
   //
   // Note: First type info is not used.
@@ -1696,202 +1696,202 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
     structVals.clear();
     structVals.push_back(llvm::ConstantInt::get(builder.getInt32Ty(), i));
     nextStruct = llvm::ConstantStruct::get(ourTypeInfoType, structVals);
-    
+
     typeInfoNameBuilder.str("");
     typeInfoNameBuilder << baseStr << i;
     typeInfoName = typeInfoNameBuilder.str();
-    
+
     // Note: Does not seem to work without allocation
-    nextGlobal = 
-    new llvm::GlobalVariable(module, 
-                             ourTypeInfoType, 
-                             true, 
-                             llvm::GlobalValue::ExternalLinkage, 
-                             nextStruct, 
+    nextGlobal =
+    new llvm::GlobalVariable(module,
+                             ourTypeInfoType,
+                             true,
+                             llvm::GlobalValue::ExternalLinkage,
+                             nextStruct,
                              typeInfoName);
-    
+
     ourTypeInfoNames.push_back(typeInfoName);
     ourTypeInfoNamesIndex[i] = typeInfoName;
   }
-  
+
   ArgNames argNames;
   ArgTypes argTypes;
   llvm::Function *funct = NULL;
-  
+
   // print32Int
-  
+
   llvm::Type *retType = builder.getVoidTy();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt32Ty());
   argTypes.push_back(builder.getInt8PtrTy());
-  
+
   argNames.clear();
-  
-  createFunction(module, 
-                 retType, 
-                 argTypes, 
-                 argNames, 
-                 "print32Int", 
-                 llvm::Function::ExternalLinkage, 
-                 true, 
+
+  createFunction(module,
+                 retType,
+                 argTypes,
+                 argNames,
+                 "print32Int",
+                 llvm::Function::ExternalLinkage,
+                 true,
                  false);
-  
+
   // print64Int
-  
+
   retType = builder.getVoidTy();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt64Ty());
   argTypes.push_back(builder.getInt8PtrTy());
-  
+
   argNames.clear();
-  
-  createFunction(module, 
-                 retType, 
-                 argTypes, 
-                 argNames, 
-                 "print64Int", 
-                 llvm::Function::ExternalLinkage, 
-                 true, 
+
+  createFunction(module,
+                 retType,
+                 argTypes,
+                 argNames,
+                 "print64Int",
+                 llvm::Function::ExternalLinkage,
+                 true,
                  false);
-  
+
   // printStr
-  
+
   retType = builder.getVoidTy();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt8PtrTy());
-  
+
   argNames.clear();
-  
-  createFunction(module, 
-                 retType, 
-                 argTypes, 
-                 argNames, 
-                 "printStr", 
-                 llvm::Function::ExternalLinkage, 
-                 true, 
+
+  createFunction(module,
+                 retType,
+                 argTypes,
+                 argNames,
+                 "printStr",
+                 llvm::Function::ExternalLinkage,
+                 true,
                  false);
-  
+
   // throwCppException
-  
+
   retType = builder.getVoidTy();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt32Ty());
-  
+
   argNames.clear();
-  
-  createFunction(module, 
-                 retType, 
-                 argTypes, 
-                 argNames, 
-                 "throwCppException", 
-                 llvm::Function::ExternalLinkage, 
-                 true, 
+
+  createFunction(module,
+                 retType,
+                 argTypes,
+                 argNames,
+                 "throwCppException",
+                 llvm::Function::ExternalLinkage,
+                 true,
                  false);
-  
+
   // deleteOurException
-  
+
   retType = builder.getVoidTy();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt8PtrTy());
-  
+
   argNames.clear();
-  
-  createFunction(module, 
-                 retType, 
-                 argTypes, 
-                 argNames, 
-                 "deleteOurException", 
-                 llvm::Function::ExternalLinkage, 
-                 true, 
+
+  createFunction(module,
+                 retType,
+                 argTypes,
+                 argNames,
+                 "deleteOurException",
+                 llvm::Function::ExternalLinkage,
+                 true,
                  false);
-  
+
   // createOurException
-  
+
   retType = builder.getInt8PtrTy();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt32Ty());
-  
+
   argNames.clear();
-  
-  createFunction(module, 
-                 retType, 
-                 argTypes, 
-                 argNames, 
-                 "createOurException", 
-                 llvm::Function::ExternalLinkage, 
-                 true, 
+
+  createFunction(module,
+                 retType,
+                 argTypes,
+                 argNames,
+                 "createOurException",
+                 llvm::Function::ExternalLinkage,
+                 true,
                  false);
-  
+
   // _Unwind_RaiseException
-  
+
   retType = builder.getInt32Ty();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt8PtrTy());
-  
+
   argNames.clear();
-  
-  funct = createFunction(module, 
-                         retType, 
-                         argTypes, 
-                         argNames, 
-                         "_Unwind_RaiseException", 
-                         llvm::Function::ExternalLinkage, 
-                         true, 
+
+  funct = createFunction(module,
+                         retType,
+                         argTypes,
+                         argNames,
+                         "_Unwind_RaiseException",
+                         llvm::Function::ExternalLinkage,
+                         true,
                          false);
-  
-  funct->addFnAttr(llvm::Attribute::NoReturn);
-  
+
+  funct->setDoesNotReturn();
+
   // _Unwind_Resume
-  
+
   retType = builder.getInt32Ty();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt8PtrTy());
-  
+
   argNames.clear();
-  
-  funct = createFunction(module, 
-                         retType, 
-                         argTypes, 
-                         argNames, 
-                         "_Unwind_Resume", 
-                         llvm::Function::ExternalLinkage, 
-                         true, 
+
+  funct = createFunction(module,
+                         retType,
+                         argTypes,
+                         argNames,
+                         "_Unwind_Resume",
+                         llvm::Function::ExternalLinkage,
+                         true,
                          false);
-  
-  funct->addFnAttr(llvm::Attribute::NoReturn);
-  
+
+  funct->setDoesNotReturn();
+
   // ourPersonality
-  
+
   retType = builder.getInt32Ty();
-  
+
   argTypes.clear();
   argTypes.push_back(builder.getInt32Ty());
   argTypes.push_back(builder.getInt32Ty());
   argTypes.push_back(builder.getInt64Ty());
   argTypes.push_back(builder.getInt8PtrTy());
   argTypes.push_back(builder.getInt8PtrTy());
-  
+
   argNames.clear();
-  
-  createFunction(module, 
-                 retType, 
-                 argTypes, 
-                 argNames, 
-                 "ourPersonality", 
-                 llvm::Function::ExternalLinkage, 
-                 true, 
+
+  createFunction(module,
+                 retType,
+                 argTypes,
+                 argNames,
+                 "ourPersonality",
+                 llvm::Function::ExternalLinkage,
+                 true,
                  false);
-  
+
   // llvm.eh.typeid.for intrinsic
-  
+
   getDeclaration(&module, llvm::Intrinsic::eh_typeid_for);
 }
 
@@ -1901,7 +1901,7 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
 //===----------------------------------------------------------------------===//
 
 /// Demo main routine which takes the type info types to throw. A test will
-/// be run for each given type info type. While type info types with the value 
+/// be run for each given type info type. While type info types with the value
 /// of -1 will trigger a foreign C++ exception to be thrown; type info types
 /// <= 6 and >= 1 will be caught by test functions; and type info types > 6
 /// will result in exceptions which pass through to the test harness. All other
@@ -1920,87 +1920,86 @@ int main(int argc, char *argv[]) {
             "   for a full test.\n\n");
     return(0);
   }
-  
+
   // If not set, exception handling will not be turned on
   llvm::TargetOptions Opts;
   Opts.JITExceptionHandling = true;
-  
+
   llvm::InitializeNativeTarget();
   llvm::LLVMContext &context = llvm::getGlobalContext();
   llvm::IRBuilder<> theBuilder(context);
-  
+
   // Make the module, which holds all the code.
   llvm::Module *module = new llvm::Module("my cool jit", context);
-  
+
   // Build engine with JIT
   llvm::EngineBuilder factory(module);
   factory.setEngineKind(llvm::EngineKind::JIT);
   factory.setAllocateGVsWithCode(false);
   factory.setTargetOptions(Opts);
   llvm::ExecutionEngine *executionEngine = factory.create();
-  
+
   {
     llvm::FunctionPassManager fpm(module);
-    
-    // Set up the optimizer pipeline.  
+
+    // Set up the optimizer pipeline.
     // Start with registering info about how the
     // target lays out data structures.
     fpm.add(new llvm::DataLayout(*executionEngine->getDataLayout()));
-    
+
     // Optimizations turned on
 #ifdef ADD_OPT_PASSES
-    
+
     // Basic AliasAnslysis support for GVN.
     fpm.add(llvm::createBasicAliasAnalysisPass());
-    
+
     // Promote allocas to registers.
     fpm.add(llvm::createPromoteMemoryToRegisterPass());
-    
+
     // Do simple "peephole" optimizations and bit-twiddling optzns.
     fpm.add(llvm::createInstructionCombiningPass());
-    
+
     // Reassociate expressions.
     fpm.add(llvm::createReassociatePass());
-    
+
     // Eliminate Common SubExpressions.
     fpm.add(llvm::createGVNPass());
-    
-    // Simplify the control flow graph (deleting unreachable 
+
+    // Simplify the control flow graph (deleting unreachable
     // blocks, etc).
     fpm.add(llvm::createCFGSimplificationPass());
 #endif  // ADD_OPT_PASSES
-    
+
     fpm.doInitialization();
-    
+
     // Generate test code using function throwCppException(...) as
     // the function which throws foreign exceptions.
-    llvm::Function *toRun = 
-    createUnwindExceptionTest(*module, 
-                              theBuilder, 
+    llvm::Function *toRun =
+    createUnwindExceptionTest(*module,
+                              theBuilder,
                               fpm,
                               "throwCppException");
-    
+
     fprintf(stderr, "\nBegin module dump:\n\n");
-    
+
     module->dump();
-    
+
     fprintf(stderr, "\nEnd module dump:\n");
-    
+
     fprintf(stderr, "\n\nBegin Test:\n");
-    
+
     for (int i = 1; i < argc; ++i) {
       // Run test for each argument whose value is the exception
       // type to throw.
-      runExceptionThrow(executionEngine, 
-                        toRun, 
+      runExceptionThrow(executionEngine,
+                        toRun,
                         (unsigned) strtoul(argv[i], NULL, 10));
     }
-    
+
     fprintf(stderr, "\nEnd Test:\n\n");
-  } 
-  
+  }
+
   delete executionEngine;
-  
+
   return 0;
 }
-
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 8cf03c268c..badc70ba22 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -1803,7 +1803,7 @@ LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg);
  * Set the alignment for a function parameter.
  *
  * @see llvm::Argument::addAttr()
- * @see llvm::Attributes::constructAlignmentFromInt()
+ * @see llvm::Attributes::Builder::addAlignmentAttr()
  */
 void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align);
 
diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h
index 92228701e3..57abfa0207 100644
--- a/include/llvm-c/Target.h
+++ b/include/llvm-c/Target.h
@@ -172,10 +172,20 @@ enum LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef);
     See the method llvm::DataLayout::getPointerSize. */
 unsigned LLVMPointerSize(LLVMTargetDataRef);
 
+/** Returns the pointer size in bytes for a target for a specified
+    address space.
+    See the method llvm::DataLayout::getPointerSize. */
+unsigned LLVMPointerSizeForAS(LLVMTargetDataRef, unsigned AS);
+
 /** Returns the integer type that is the same size as a pointer on a target.
     See the method llvm::DataLayout::getIntPtrType. */
 LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef);
 
+/** Returns the integer type that is the same size as a pointer on a target.
+    This version allows the address space to be specified.
+    See the method llvm::DataLayout::getIntPtrType. */
+LLVMTypeRef LLVMIntPtrTypeForAS(LLVMTargetDataRef, unsigned AS);
+
 /** Computes the size of a type in bytes for a target.
     See the method llvm::DataLayout::getTypeSizeInBits. */
 unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef, LLVMTypeRef);
diff --git a/include/llvm/ADT/ImmutableSet.h b/include/llvm/ADT/ImmutableSet.h
index 261d0494e2..3900f96be1 100644
--- a/include/llvm/ADT/ImmutableSet.h
+++ b/include/llvm/ADT/ImmutableSet.h
@@ -89,7 +89,7 @@ public:
   ImutAVLTree* getMaxElement() {
     ImutAVLTree *T = this;
     ImutAVLTree *Right = T->getRight();
-    while (Right) { T = right; right = T->getRight(); }
+    while (Right) { T = Right; Right = T->getRight(); }
     return T;
   }
 
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index a0527683f6..15fe55fbe3 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -76,7 +76,8 @@ public:
     SCEI,
     BGP,
     BGQ,
-    Freescale
+    Freescale,
+    IBM
   };
   enum OSType {
     UnknownOS,
@@ -101,7 +102,8 @@ public:
     RTEMS,
     NativeClient,
     CNK,         // BG/P Compute-Node Kernel
-    Bitrig
+    Bitrig,
+    AIX
   };
   enum EnvironmentType {
     UnknownEnvironment,
diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h
index e0b6a507bc..03ee520440 100644
--- a/include/llvm/Attributes.h
+++ b/include/llvm/Attributes.h
@@ -15,7 +15,6 @@
 #ifndef LLVM_ATTRIBUTES_H
 #define LLVM_ATTRIBUTES_H
 
-#include "llvm/AttributesImpl.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/ADT/ArrayRef.h"
 #include <cassert>
@@ -89,21 +88,35 @@ public:
     ZExt            = 27   ///< Zero extended before/after call
   };
 private:
-  AttributesImpl Attrs;
-
-  explicit Attributes(AttributesImpl *A);
+  AttributesImpl *Attrs;
+  Attributes(AttributesImpl *A);
 public:
   Attributes() : Attrs(0) {}
-  explicit Attributes(uint64_t Val);
-  explicit Attributes(LLVMContext &C, AttrVal Val);
   Attributes(const Attributes &A);
+  Attributes &operator=(const Attributes &A) {
+    Attrs = A.Attrs;
+    return *this;
+  }
 
+  /// get - Return a uniquified Attributes object. This takes the uniquified
+  /// value from the Builder and wraps it in the Attributes class.
+  class Builder;
+  static Attributes get(LLVMContext &Context, ArrayRef<AttrVal> Vals);
+  static Attributes get(LLVMContext &Context, Builder &B);
+
+  //===--------------------------------------------------------------------===//
+  /// Attributes::Builder - This class is used in conjunction with the
+  /// Attributes::get method to create an Attributes object. The object itself
+  /// is uniquified. The Builder's value, however, is not. So this can be used
+  /// as a quick way to test for equality, presence of attributes, etc.
   class Builder {
     friend class Attributes;
     uint64_t Bits;
   public:
     Builder() : Bits(0) {}
+    explicit Builder(uint64_t B) : Bits(B) {}
     Builder(const Attributes &A) : Bits(A.Raw()) {}
+    Builder(const Builder &B) : Bits(B.Bits) {}
 
     void clear() { Bits = 0; }
 
@@ -118,10 +131,20 @@ public:
     Builder &addAttribute(Attributes::AttrVal Val);
     Builder &removeAttribute(Attributes::AttrVal Val);
 
-    void addAlignmentAttr(unsigned Align);
-    void addStackAlignmentAttr(unsigned Align);
+    Builder &addAttributes(const Attributes &A);
+    Builder &removeAttributes(const Attributes &A);
 
-    void removeAttributes(const Attributes &A);
+    /// addRawValue - Add the raw value to the internal representation. This
+    /// should be used ONLY for decoding bitcode!
+    Builder &addRawValue(uint64_t Val);
+
+    /// addAlignmentAttr - This turns an int alignment (which must be a power of
+    /// 2) into the form used internally in Attributes.
+    Builder &addAlignmentAttr(unsigned Align);
+
+    /// addStackAlignmentAttr - This turns an int stack alignment (which must be
+    /// a power of 2) into the form used internally in Attributes.
+    Builder &addStackAlignmentAttr(unsigned Align);
 
     /// @brief Remove attributes that are used on functions only.
     void removeFunctionOnlyAttrs() {
@@ -144,20 +167,20 @@ public:
         .removeAttribute(Attributes::ReturnsTwice)
         .removeAttribute(Attributes::AddressSafety);
     }
-  };
 
-  /// get - Return a uniquified Attributes object. This takes the uniquified
-  /// value from the Builder and wraps it in the Attributes class.
-  static Attributes get(Builder &B);
-  static Attributes get(LLVMContext &Context, Builder &B);
+    bool operator==(const Builder &B) {
+      return Bits == B.Bits;
+    }
+    bool operator!=(const Builder &B) {
+      return Bits != B.Bits;
+    }
+  };
 
   /// @brief Return true if the attribute is present.
   bool hasAttribute(AttrVal Val) const;
 
   /// @brief Return true if attributes exist
-  bool hasAttributes() const {
-    return Attrs.hasAttributes();
-  }
+  bool hasAttributes() const;
 
   /// @brief Return true if the attributes are a non-null intersection.
   bool hasAttributes(const Attributes &A) const;
@@ -206,50 +229,15 @@ public:
       hasAttribute(Attributes::AddressSafety);
   }
 
-  bool isEmptyOrSingleton() const;
-
-  // This is a "safe bool() operator".
-  operator const void *() const { return Attrs.Bits ? this : 0; }
-  bool operator == (const Attributes &A) const {
-    return Attrs.Bits == A.Attrs.Bits;
+  bool operator==(const Attributes &A) const {
+    return Attrs == A.Attrs;
   }
-  bool operator != (const Attributes &A) const {
-    return Attrs.Bits != A.Attrs.Bits;
+  bool operator!=(const Attributes &A) const {
+    return Attrs != A.Attrs;
   }
 
-  Attributes operator | (const Attributes &A) const;
-  Attributes operator & (const Attributes &A) const;
-  Attributes operator ^ (const Attributes &A) const;
-  Attributes &operator |= (const Attributes &A);
-  Attributes &operator &= (const Attributes &A);
-  Attributes operator ~ () const;
-
   uint64_t Raw() const;
 
-  /// constructAlignmentFromInt - This turns an int alignment (a power of 2,
-  /// normally) into the form used internally in Attributes.
-  static Attributes constructAlignmentFromInt(unsigned i) {
-    // Default alignment, allow the target to define how to align it.
-    if (i == 0)
-      return Attributes();
-
-    assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
-    assert(i <= 0x40000000 && "Alignment too large.");
-    return Attributes((Log2_32(i)+1) << 16);
-  }
-
-  /// constructStackAlignmentFromInt - This turns an int stack alignment (which
-  /// must be a power of 2) into the form used internally in Attributes.
-  static Attributes constructStackAlignmentFromInt(unsigned i) {
-    // Default alignment, allow the target to define how to align it.
-    if (i == 0)
-      return Attributes();
-
-    assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
-    assert(i <= 0x100 && "Alignment too large.");
-    return Attributes((Log2_32(i)+1) << 26);
-  }
-
   /// @brief Which attributes cannot be applied to a type.
   static Attributes typeIncompatible(Type *Ty);
 
@@ -278,18 +266,19 @@ public:
   /// containing the LLVM attributes that have been decoded from the given
   /// integer.  This function must stay in sync with
   /// 'encodeLLVMAttributesForBitcode'.
-  static Attributes decodeLLVMAttributesForBitcode(uint64_t EncodedAttrs) {
+  static Attributes decodeLLVMAttributesForBitcode(LLVMContext &C,
+                                                   uint64_t EncodedAttrs) {
     // The alignment is stored as a 16-bit raw value from bits 31--16.  We shift
     // the bits above 31 down by 11 bits.
     unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16;
     assert((!Alignment || isPowerOf2_32(Alignment)) &&
            "Alignment must be a power of two.");
 
-    Attributes Attrs(EncodedAttrs & 0xffff);
+    Attributes::Builder B(EncodedAttrs & 0xffff);
     if (Alignment)
-      Attrs |= Attributes::constructAlignmentFromInt(Alignment);
-    Attrs |= Attributes((EncodedAttrs & (0xfffULL << 32)) >> 11);
-    return Attrs;
+      B.addAlignmentAttr(Alignment);
+    B.addRawValue((EncodedAttrs & (0xfffULL << 32)) >> 11);
+    return Attributes::get(C, B);
   }
 
   /// getAsString - The set of Attributes set in Attributes is converted to a
@@ -311,7 +300,7 @@ struct AttributeWithIndex {
                      ///< Index 0 is used for return value attributes.
                      ///< Index ~0U is used for function attributes.
 
-  static AttributeWithIndex get(unsigned Idx,
+  static AttributeWithIndex get(LLVMContext &C, unsigned Idx,
                                 ArrayRef<Attributes::AttrVal> Attrs) {
     Attributes::Builder B;
 
@@ -321,7 +310,7 @@ struct AttributeWithIndex {
 
     AttributeWithIndex P;
     P.Index = Idx;
-    P.Attrs = Attributes::get(B);
+    P.Attrs = Attributes::get(C, B);
     return P;
   }
   static AttributeWithIndex get(unsigned Idx, Attributes Attrs) {
@@ -341,6 +330,12 @@ class AttributeListImpl;
 /// AttrListPtr - This class manages the ref count for the opaque
 /// AttributeListImpl object and provides accessors for it.
 class AttrListPtr {
+public:
+  enum AttrIndex {
+    ReturnIndex = 0U,
+    FunctionIndex = ~0U
+  };
+private:
   /// AttrList - The attributes that we are managing.  This can be null
   /// to represent the empty attributes list.
   AttributeListImpl *AttrList;
@@ -360,12 +355,12 @@ public:
   /// addAttr - Add the specified attribute at the specified index to this
   /// attribute list.  Since attribute lists are immutable, this
   /// returns the new list.
-  AttrListPtr addAttr(unsigned Idx, Attributes Attrs) const;
+  AttrListPtr addAttr(LLVMContext &C, unsigned Idx, Attributes Attrs) const;
 
   /// removeAttr - Remove the specified attribute at the specified index from
   /// this attribute list.  Since attribute lists are immutable, this
   /// returns the new list.
-  AttrListPtr removeAttr(unsigned Idx, Attributes Attrs) const;
+  AttrListPtr removeAttr(LLVMContext &C, unsigned Idx, Attributes Attrs) const;
 
   //===--------------------------------------------------------------------===//
   // Attribute List Accessors
@@ -379,12 +374,12 @@ public:
   /// getRetAttributes - The attributes for the ret value are
   /// returned.
   Attributes getRetAttributes() const {
-    return getAttributes(0);
+    return getAttributes(ReturnIndex);
   }
 
   /// getFnAttributes - The function attributes are returned.
   Attributes getFnAttributes() const {
-    return getAttributes(~0U);
+    return getAttributes(FunctionIndex);
   }
 
   /// paramHasAttr - Return true if the specified parameter index has the
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index 93990e164d..2b96c7abe4 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -110,6 +110,10 @@ public:
   /// Initialize the strategy after building the DAG for a new region.
   virtual void initialize(ScheduleDAGMI *DAG) = 0;
 
+  /// Notify this strategy that all roots have been released (including those
+  /// that depend on EntrySU or ExitSU).
+  virtual void registerRoots() {}
+
   /// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
   /// schedule the node at the top of the unscheduled region. Otherwise it will
   /// be scheduled at the bottom.
diff --git a/include/llvm/CodeGen/ScheduleDAGILP.h b/include/llvm/CodeGen/ScheduleDAGILP.h
new file mode 100644
index 0000000000..1aa4058421
--- /dev/null
+++ b/include/llvm/CodeGen/ScheduleDAGILP.h
@@ -0,0 +1,86 @@
+//===- ScheduleDAGILP.h - ILP metric for ScheduleDAGInstrs ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Definition of an ILP metric for machine level instruction scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SCHEDULEDAGILP_H
+#define LLVM_CODEGEN_SCHEDULEDAGILP_H
+
+#include "llvm/Support/DataTypes.h"
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+class ScheduleDAGInstrs;
+class SUnit;
+
+/// \brief Represent the ILP of the subDAG rooted at a DAG node.
+struct ILPValue {
+  unsigned InstrCount;
+  unsigned Cycles;
+
+  ILPValue(): InstrCount(0), Cycles(0) {}
+
+  ILPValue(unsigned count, unsigned cycles):
+    InstrCount(count), Cycles(cycles) {}
+
+  bool isValid() const { return Cycles > 0; }
+
+  // Order by the ILP metric's value.
+  bool operator<(ILPValue RHS) const {
+    return (uint64_t)InstrCount * RHS.Cycles
+      < (uint64_t)Cycles * RHS.InstrCount;
+  }
+  bool operator>(ILPValue RHS) const {
+    return RHS < *this;
+  }
+  bool operator<=(ILPValue RHS) const {
+    return (uint64_t)InstrCount * RHS.Cycles
+      <= (uint64_t)Cycles * RHS.InstrCount;
+  }
+  bool operator>=(ILPValue RHS) const {
+    return RHS <= *this;
+  }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  void print(raw_ostream &OS) const;
+
+  void dump() const;
+#endif
+};
+
+/// \brief Compute the values of each DAG node for an ILP metric.
+///
+/// This metric assumes that the DAG is a forest of trees with roots at the
+/// bottom of the schedule.
+class ScheduleDAGILP {
+  bool IsBottomUp;
+  std::vector<ILPValue> ILPValues;
+
+public:
+  ScheduleDAGILP(bool IsBU): IsBottomUp(IsBU) {}
+
+  /// \brief Initialize the result data with the size of the DAG.
+  void resize(unsigned NumSUnits);
+
+  /// \brief Compute the ILP metric for the subDAG at this root.
+  void computeILP(const SUnit *Root);
+
+  /// \brief Get the ILP value for a DAG node.
+  ILPValue getILP(const SUnit *SU);
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val);
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/DataLayout.h b/include/llvm/DataLayout.h
index a24737e842..c9ac0b7fea 100644
--- a/include/llvm/DataLayout.h
+++ b/include/llvm/DataLayout.h
@@ -231,9 +231,7 @@ public:
   }
 
   /// Layout pointer alignment
-  /// FIXME: The defaults need to be removed once all of
-  /// the backends/clients are updated.
-  unsigned getPointerABIAlignment(unsigned AS = 0)  const {
+  unsigned getPointerABIAlignment(unsigned AS)  const {
     DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS);
     if (val == Pointers.end()) {
       val = Pointers.find(0);
@@ -241,9 +239,7 @@ public:
     return val->second.ABIAlign;
   }
   /// Return target's alignment for stack-based pointers
-  /// FIXME: The defaults need to be removed once all of
-  /// the backends/clients are updated.
-  unsigned getPointerPrefAlignment(unsigned AS = 0) const {
+  unsigned getPointerPrefAlignment(unsigned AS) const {
     DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS);
     if (val == Pointers.end()) {
       val = Pointers.find(0);
@@ -251,9 +247,7 @@ public:
     return val->second.PrefAlign;
   }
   /// Layout pointer size
-  /// FIXME: The defaults need to be removed once all of
-  /// the backends/clients are updated.
-  unsigned getPointerSize(unsigned AS = 0)          const {
+  unsigned getPointerSize(unsigned AS)          const {
     DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS);
     if (val == Pointers.end()) {
       val = Pointers.find(0);
@@ -261,9 +255,7 @@ public:
     return val->second.TypeBitWidth;
   }
   /// Layout pointer size, in bits
-  /// FIXME: The defaults need to be removed once all of
-  /// the backends/clients are updated.
-  unsigned getPointerSizeInBits(unsigned AS = 0)    const {
+  unsigned getPointerSizeInBits(unsigned AS)    const {
     DenseMap<unsigned, PointerAlignElem>::const_iterator val = Pointers.find(AS);
     if (val == Pointers.end()) {
       val = Pointers.find(0);
diff --git a/include/llvm/Function.h b/include/llvm/Function.h
index f28aa5cdd7..f36d5650b3 100644
--- a/include/llvm/Function.h
+++ b/include/llvm/Function.h
@@ -180,7 +180,7 @@ public:
     // Function Attributes are stored at ~0 index 
     Attributes::Builder B;
     B.addAttribute(N);
-    addAttribute(~0U, Attributes::get(B));
+    addAttribute(~0U, Attributes::get(getContext(), B));
   }
 
   /// removeFnAttr - Remove function attributes from this function.
@@ -280,7 +280,7 @@ public:
   void setDoesNotAlias(unsigned n) {
     Attributes::Builder B;
     B.addAttribute(Attributes::NoAlias);
-    addAttribute(n, Attributes::get(B));
+    addAttribute(n, Attributes::get(getContext(), B));
   }
 
   /// @brief Determine if the parameter can be captured.
@@ -291,7 +291,7 @@ public:
   void setDoesNotCapture(unsigned n) {
     Attributes::Builder B;
     B.addAttribute(Attributes::NoCapture);
-    addAttribute(n, Attributes::get(B));
+    addAttribute(n, Attributes::get(getContext(), B));
   }
 
   /// copyAttributesFrom - copy all additional attributes (those not needed to
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
index e43b476fab..458bd6ddb6 100644
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@@ -348,7 +348,16 @@ public:
   static unsigned getPointerOperandIndex() { return 1U; }
 
   unsigned getPointerAddressSpace() const {
-    return cast<PointerType>(getPointerOperand()->getType())->getAddressSpace();
+    if (getPointerOperand()->getType()->isPointerTy())
+      return cast<PointerType>(getPointerOperand()->getType())
+        ->getAddressSpace();
+    if (getPointerOperand()->getType()->isVectorTy()
+        && cast<VectorType>(getPointerOperand()->getType())->isPointerTy())
+      return cast<PointerType>(cast<VectorType>(
+            getPointerOperand()->getType())->getElementType())
+        ->getAddressSpace();
+    llvm_unreachable("Only a vector of pointers or pointers can be used!");
+    return 0;
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -1274,7 +1283,7 @@ public:
   void setIsNoInline() {
     Attributes::Builder B;
     B.addAttribute(Attributes::NoInline);
-    addAttribute(~0, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), B));
   }
 
   /// @brief Return true if the call can return twice
@@ -1284,7 +1293,7 @@ public:
   void setCanReturnTwice() {
     Attributes::Builder B;
     B.addAttribute(Attributes::ReturnsTwice);
-    addAttribute(~0U, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), B));
   }
 
   /// @brief Determine if the call does not access memory.
@@ -1294,7 +1303,7 @@ public:
   void setDoesNotAccessMemory() {
     Attributes::Builder B;
     B.addAttribute(Attributes::ReadNone);
-    addAttribute(~0U, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), B));
   }
 
   /// @brief Determine if the call does not access or only reads memory.
@@ -1304,7 +1313,7 @@ public:
   void setOnlyReadsMemory() {
     Attributes::Builder B;
     B.addAttribute(Attributes::ReadOnly);
-    addAttribute(~0, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), B));
   }
 
   /// @brief Determine if the call cannot return.
@@ -1312,7 +1321,7 @@ public:
   void setDoesNotReturn() {
     Attributes::Builder B;
     B.addAttribute(Attributes::NoReturn);
-    addAttribute(~0, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), B));
   }
 
   /// @brief Determine if the call cannot unwind.
@@ -1320,7 +1329,7 @@ public:
   void setDoesNotThrow() {
     Attributes::Builder B;
     B.addAttribute(Attributes::NoUnwind);
-    addAttribute(~0, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), B));
   }
 
   /// @brief Determine if the call returns a structure through first
@@ -3029,7 +3038,7 @@ public:
   void setIsNoInline() {
     Attributes::Builder B;
     B.addAttribute(Attributes::NoInline);
-    addAttribute(~0, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), B));
   }
 
   /// @brief Determine if the call does not access memory.
@@ -3039,7 +3048,7 @@ public:
   void setDoesNotAccessMemory() {
     Attributes::Builder B;
     B.addAttribute(Attributes::ReadNone);
-    addAttribute(~0, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), B));
   }
 
   /// @brief Determine if the call does not access or only reads memory.
@@ -3049,7 +3058,7 @@ public:
   void setOnlyReadsMemory() {
     Attributes::Builder B;
     B.addAttribute(Attributes::ReadOnly);
-    addAttribute(~0, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), B));
   }
 
   /// @brief Determine if the call cannot return.
@@ -3057,7 +3066,7 @@ public:
   void setDoesNotReturn() {
     Attributes::Builder B;
     B.addAttribute(Attributes::NoReturn);
-    addAttribute(~0, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), B));
   }
 
   /// @brief Determine if the call cannot unwind.
@@ -3065,7 +3074,7 @@ public:
   void setDoesNotThrow() {
     Attributes::Builder B;
     B.addAttribute(Attributes::NoUnwind);
-    addAttribute(~0, Attributes::get(B));
+    addAttribute(AttrListPtr::FunctionIndex, Attributes::get(getContext(), B));
   }
 
   /// @brief Determine if the call returns a structure through first
@@ -3618,7 +3627,15 @@ public:
 
   /// @brief return the address space of the pointer.
   unsigned getAddressSpace() const {
-    return cast<PointerType>(getType())->getAddressSpace();
+    if (getType()->isPointerTy()) 
+      return cast<PointerType>(getType())->getAddressSpace();
+    if (getType()->isVectorTy() &&
+        cast<VectorType>(getType())->getElementType()->isPointerTy())
+      return cast<PointerType>(
+          cast<VectorType>(getType())->getElementType())
+        ->getAddressSpace();
+    llvm_unreachable("Must be a pointer or a vector of pointers.");
+    return 0;
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -3659,7 +3676,16 @@ public:
 
   /// @brief return the address space of the pointer.
   unsigned getPointerAddressSpace() const {
-    return cast<PointerType>(getOperand(0)->getType())->getAddressSpace();
+    Type *Ty = getOperand(0)->getType();
+    if (Ty->isPointerTy())
+      return cast<PointerType>(Ty)->getAddressSpace();
+    if (Ty->isVectorTy()
+        && cast<VectorType>(Ty)->getElementType()->isPointerTy())
+      return cast<PointerType>(
+          cast<VectorType>(Ty)->getElementType())
+        ->getAddressSpace();
+    llvm_unreachable("Must be a pointer or a vector of pointers.");
+    return 0;
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
diff --git a/include/llvm/Intrinsics.h b/include/llvm/Intrinsics.h
index c3503889e7..3108a8e525 100644
--- a/include/llvm/Intrinsics.h
+++ b/include/llvm/Intrinsics.h
@@ -50,7 +50,7 @@ namespace Intrinsic {
   /// Intrinsic::getType(ID) - Return the function type for an intrinsic.
   ///
   FunctionType *getType(LLVMContext &Context, ID id,
-                              ArrayRef<Type*> Tys = ArrayRef<Type*>());
+                        ArrayRef<Type*> Tys = ArrayRef<Type*>());
 
   /// Intrinsic::isOverloaded(ID) - Returns true if the intrinsic can be
   /// overloaded.
@@ -58,7 +58,7 @@ namespace Intrinsic {
 
   /// Intrinsic::getAttributes(ID) - Return the attributes for an intrinsic.
   ///
-  AttrListPtr getAttributes(ID id);
+  AttrListPtr getAttributes(LLVMContext &C, ID id);
 
   /// Intrinsic::getDeclaration(M, ID) - Create or insert an LLVM Function
   /// declaration for an intrinsic, and return it.
diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h
index adc960d27e..08758cda22 100644
--- a/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/include/llvm/MC/MCParser/MCAsmParser.h
@@ -20,6 +20,7 @@ class MCAsmLexer;
 class MCAsmParserExtension;
 class MCContext;
 class MCExpr;
+class MCParsedAsmOperand;
 class MCStreamer;
 class MCTargetAsmParser;
 class SMLoc;
@@ -73,6 +74,27 @@ public:
   /// Run - Run the parser on the input source buffer.
   virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false) = 0;
 
+  virtual void setParsingInlineAsm(bool V) = 0;
+
+  /// ParseStatement - Parse the next statement.
+  virtual bool ParseStatement() = 0;
+
+  /// getNumParsedOperands - Returns the number of MCAsmParsedOperands from the
+  /// previously parsed statement.
+  virtual unsigned getNumParsedOperands() = 0;
+
+  /// getParsedOperand - Get a MCAsmParsedOperand.
+  virtual MCParsedAsmOperand &getParsedOperand(unsigned OpNum) = 0;
+
+  /// freeParsedOperands - Free the MCAsmParsedOperands.
+  virtual void freeParsedOperands() = 0;
+
+  /// isInstruction - Was the previously parsed statement an instruction?
+  virtual bool isInstruction() = 0;
+
+  /// getOpcode - Get the opcode from the previously parsed instruction.
+  virtual unsigned getOpcode() = 0;
+
   /// Warning - Emit a warning at the location \p L, with the message \p Msg.
   ///
   /// \return The return value is true, if warnings are fatal.
diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
index 0ce32d617e..280145bfbc 100644
--- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h
+++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -19,10 +19,34 @@ class raw_ostream;
 /// base class is used by target-independent clients and is the interface
 /// between parsing an asm instruction and recognizing it.
 class MCParsedAsmOperand {
+  /// MCOperandNum - The corresponding MCInst operand number.  Only valid when
+  /// parsing MS-style inline assembly.
+  unsigned MCOperandNum;
+
+  /// Constraint - The constraint on this operand.  Only valid when parsing
+  /// MS-style inline assembly.
+  std::string Constraint;
+
 public:
   MCParsedAsmOperand() {}
   virtual ~MCParsedAsmOperand() {}
 
+  void setConstraint(StringRef C) { Constraint = C.str(); }
+  StringRef getConstraint() { return Constraint; }
+
+  void setMCOperandNum (unsigned OpNum) { MCOperandNum = OpNum; }
+  unsigned getMCOperandNum() { return MCOperandNum; }
+
+  unsigned getNameLen() {
+    assert (getStartLoc().isValid() && "Invalid StartLoc!");
+    assert (getEndLoc().isValid() && "Invalid EndLoc!");
+    return getEndLoc().getPointer() - getStartLoc().getPointer();
+  }
+
+  StringRef getName() {
+    return StringRef(getStartLoc().getPointer(), getNameLen());
+  }
+
   /// isToken - Is this a token operand?
   virtual bool isToken() const = 0;
   /// isImm - Is this an immediate operand?
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index ad30b9ca60..40f83bf5d5 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -575,6 +575,11 @@ namespace llvm {
     virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
                              bool isVector);
 
+    /// PPC-related methods.
+    /// FIXME: Eventually replace it with some "target MC streamer" and move
+    /// these methods there.
+    virtual void EmitTCEntry(const MCSymbol &S);
+
     /// FinishImpl - Streamer specific finalization.
     virtual void FinishImpl() = 0;
     /// Finish - Finish emission of machine code.
diff --git a/include/llvm/MC/MCTargetAsmParser.h b/include/llvm/MC/MCTargetAsmParser.h
index a966a6b8b3..c9ea5ae484 100644
--- a/include/llvm/MC/MCTargetAsmParser.h
+++ b/include/llvm/MC/MCTargetAsmParser.h
@@ -50,12 +50,6 @@ public:
   virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
                              SMLoc &EndLoc) = 0;
 
-  /// MapAndConstraints - Map inline assembly operands to MCInst operands
-  /// and an associated constraint.
-  typedef std::pair< unsigned, std::string > MapAndConstraint;
-  typedef SmallVector<MapAndConstraint, 4> MatchInstMapAndConstraints;
-  typedef SmallVectorImpl<MapAndConstraint> MatchInstMapAndConstraintsImpl;
-
   /// ParseInstruction - Parse one assembly instruction.
   ///
   /// The parser is positioned following the instruction name. The target
@@ -88,22 +82,6 @@ public:
   /// otherwise.
   virtual bool mnemonicIsValid(StringRef Mnemonic) = 0;
 
-  /// MatchInstruction - Recognize a series of operands of a parsed instruction
-  /// as an actual MCInst.  This returns false on success and returns true on
-  /// failure to match.
-  ///
-  /// On failure, the target parser is responsible for emitting a diagnostic
-  /// explaining the match failure.
-  virtual bool
-  MatchInstruction(SMLoc IDLoc, 
-                   SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                   MCStreamer &Out, unsigned &Kind, unsigned &Opcode,
-                   MatchInstMapAndConstraintsImpl &MapAndConstraints,
-                   unsigned &OrigErrorInfo, bool matchingInlineAsm = false) {
-    OrigErrorInfo = ~0x0;
-    return true;
-  }
-
   /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
   /// instruction as an actual MCInst and emit it to the specified MCStreamer.
   /// This returns false on success and returns true on failure to match.
@@ -111,9 +89,10 @@ public:
   /// On failure, the target parser is responsible for emitting a diagnostic
   /// explaining the match failure.
   virtual bool
-  MatchAndEmitInstruction(SMLoc IDLoc,
+  MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                           SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                          MCStreamer &Out) = 0;
+                          MCStreamer &Out, unsigned &ErrorInfo,
+                          bool MatchingInlineAsm) = 0;
 
   /// checkTargetMatchPredicate - Validate the instruction match against
   /// any complex target predicates not expressible via match classes.
@@ -122,8 +101,7 @@ public:
   }
 
   virtual void convertToMapAndConstraints(unsigned Kind,
-                           const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                         MatchInstMapAndConstraintsImpl &MapAndConstraints) = 0;
+                      const SmallVectorImpl<MCParsedAsmOperand*> &Operands) = 0;
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Support/Memory.h b/include/llvm/Support/Memory.h
index 8227c84bff..025eee7f9f 100644
--- a/include/llvm/Support/Memory.h
+++ b/include/llvm/Support/Memory.h
@@ -98,8 +98,8 @@ namespace sys {
     /// \p ErrMsg [out] returns a string describing any error that occured.
     ///
     /// If \p Flags is MF_WRITE, the actual behavior varies
-    /// with the operating system (i.e. MF_READWRITE on Windows) and the
-    /// target architecture (i.e. MF_WRITE -> MF_READWRITE on i386).
+    /// with the operating system (i.e. MF_READ | MF_WRITE on Windows) and the
+    /// target architecture (i.e. MF_WRITE -> MF_READ | MF_WRITE on i386).
     ///
     /// \r error_success if the function was successful, or an error_code
     /// describing the failure if an error occurred.
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index bf81d1a68f..1e5b192d46 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -158,7 +158,7 @@ public:
   // Return the pointer type for the given address space, defaults to
   // the pointer type from the data layout.
   // FIXME: The default needs to be removed once all the code is updated.
-  virtual MVT getPointerTy(uint32_t addrspace = 0) const { return PointerTy; }
+  virtual MVT getPointerTy(uint32_t AS = 0) const { return PointerTy; }
   virtual MVT getShiftAmountTy(EVT LHSTy) const;
 
   /// isSelectExpensive - Return true if the select operation is expensive for
diff --git a/include/llvm/Target/TargetTransformImpl.h b/include/llvm/Target/TargetTransformImpl.h
index e240cdf64a..7648f4f935 100644
--- a/include/llvm/Target/TargetTransformImpl.h
+++ b/include/llvm/Target/TargetTransformImpl.h
@@ -30,7 +30,7 @@ private:
 
 public:
   /// Ctor
-  explicit ScalarTargetTransformImpl(const TargetLowering *TL): TLI(TL) {}
+  explicit ScalarTargetTransformImpl(const TargetLowering *TL) : TLI(TL) {}
 
   virtual bool isLegalAddImmediate(int64_t imm) const;
 
diff --git a/include/llvm/TargetTransformInfo.h b/include/llvm/TargetTransformInfo.h
index e1be3ba5a1..82fc14dbd7 100644
--- a/include/llvm/TargetTransformInfo.h
+++ b/include/llvm/TargetTransformInfo.h
@@ -75,6 +75,8 @@ public:
 /// LSR, and LowerInvoke use this interface.
 class ScalarTargetTransformInfo {
 public:
+  virtual ~ScalarTargetTransformInfo() {}
+
   /// isLegalAddImmediate - Return true if the specified immediate is legal
   /// add immediate, that is the target has add instructions which can add
   /// a register with the immediate without having to materialize the
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index 4b0c448acf..8e63aaa4e8 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -34,7 +34,7 @@ ModulePass *createGCOVProfilerPass(bool EmitNotes = true, bool EmitData = true,
                                    bool UseExtraChecksum = false);
 
 // Insert AddressSanitizer (address sanity checking) instrumentation
-ModulePass *createAddressSanitizerPass();
+FunctionPass *createAddressSanitizerPass();
 // Insert ThreadSanitizer (race detection) instrumentation
 FunctionPass *createThreadSanitizerPass();
 
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 21dd3fbe11..fd1b5556ef 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -186,7 +186,8 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &TD, User *GEP,
   bool isInBounds = cast<GEPOperator>(GEP)->isInBounds() && !NoAssumptions;
 
   // Build a mask for high order bits.
-  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  unsigned AS = cast<GEPOperator>(GEP)->getPointerAddressSpace();
+  unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
   uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
 
   for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
new file mode 100644
index 0000000000..5db2d00181
--- /dev/null
+++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -0,0 +1,43 @@
+//===- SimplifyLibCalls.h - Library call simplifier -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes an interface to build some C language libcalls for
+// optimization passes that need to call the various functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYLIBCALLS_H
+#define LLVM_TRANSFORMS_UTILS_SIMPLIFYLIBCALLS_H
+
+namespace llvm {
+  class Value;
+  class CallInst;
+  class DataLayout;
+  class TargetLibraryInfo;
+  class LibCallSimplifierImpl;
+
+  /// LibCallSimplifier - This class implements a collection of optimizations
+  /// that replace well formed calls to library functions with a more optimal
+  /// form.  For example, replacing 'printf("Hello!")' with 'puts("Hello!")'.
+  class LibCallSimplifier {
+    /// Impl - A pointer to the actual implementation of the library call
+    /// simplifier.
+    LibCallSimplifierImpl *Impl;
+  public:
+    LibCallSimplifier(const DataLayout *TD, const TargetLibraryInfo *TLI);
+    virtual ~LibCallSimplifier();
+
+    /// optimizeCall - Take the given call instruction and return a more
+    /// optimal value to replace the instruction with or 0 if a more
+    /// optimal form can't be found.
+    Value *optimizeCall(CallInst *CI);
+  };
+} // End llvm namespace
+
+#endif
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 263bfc031f..36903f94e2 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -286,7 +286,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
       V = GEPOp->getOperand(0);
       continue;
     }
-    
+
+    unsigned AS = GEPOp->getPointerAddressSpace();
     // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
     gep_type_iterator GTI = gep_type_begin(GEPOp);
     for (User::const_op_iterator I = GEPOp->op_begin()+1,
@@ -315,7 +316,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
       // If the integer type is smaller than the pointer size, it is implicitly
       // sign extended to pointer size.
       unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
-      if (TD->getPointerSizeInBits() > Width)
+      if (TD->getPointerSizeInBits(AS) > Width)
         Extension = EK_SignExt;
       
       // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
@@ -344,7 +345,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
       
       // Make sure that we have a scale that makes sense for this target's
       // pointer size.
-      if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
+      if (unsigned ShiftBits = 64-TD->getPointerSizeInBits(AS)) {
         Scale <<= ShiftBits;
         Scale = (int64_t)Scale >> ShiftBits;
       }
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index 651a54be1b..d669268496 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -91,14 +91,16 @@ bool llvm::isInstructionFree(const Instruction *I, const DataLayout *TD) {
     // which doesn't contain values outside the range of a pointer.
     if (isa<IntToPtrInst>(CI) && TD &&
         TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) &&
-        Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits())
+        Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits(
+          cast<IntToPtrInst>(CI)->getAddressSpace()))
       return true;
 
     // A ptrtoint cast is free so long as the result is large enough to store
     // the pointer, and a legal integer type.
     if (isa<PtrToIntInst>(CI) && TD &&
         TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) &&
-        Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits())
+        Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits(
+          cast<PtrToIntInst>(CI)->getPointerAddressSpace()))
       return true;
 
     // trunc to a native type is free (assuming the target has compare and
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index b7bf044a36..146897ad67 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -916,10 +916,11 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
       if (TD && CE->getOpcode() == Instruction::IntToPtr) {
         Constant *Input = CE->getOperand(0);
         unsigned InWidth = Input->getType()->getScalarSizeInBits();
-        if (TD->getPointerSizeInBits() < InWidth) {
+        unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace();
+        if (TD->getPointerSizeInBits(AS) < InWidth) {
           Constant *Mask = 
             ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth,
-                                                  TD->getPointerSizeInBits()));
+                                                  TD->getPointerSizeInBits(AS)));
           Input = ConstantExpr::getAnd(Input, Mask);
         }
         // Do a zext or trunc to get to the dest size.
@@ -932,9 +933,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
     // the int size is >= the ptr size.  This requires knowing the width of a
     // pointer, so it can't be done in ConstantExpr::getCast.
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
-      if (TD &&
-          TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() &&
-          CE->getOpcode() == Instruction::PtrToInt)
+      if (TD && CE->getOpcode() == Instruction::PtrToInt &&
+          TD->getPointerSizeInBits(
+            cast<PointerType>(CE->getOperand(0)->getType())->getAddressSpace())
+          <= CE->getType()->getScalarSizeInBits())
         return FoldBitCast(CE->getOperand(0), DestTy, *TD);
 
     return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index c7bec4323c..016fe396e7 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -1160,8 +1160,8 @@ bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,
   DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
   NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
   if (Delta->isZero()) {
-    Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
-    Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
+    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
+    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
     ++WeakCrossingSIVsuccesses;
     if (!Result.DV[Level].Direction) {
       ++WeakCrossingSIVindependence;
@@ -1222,8 +1222,8 @@ bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,
     }
     if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) {
       // i = i' = UB
-      Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
-      Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
+      Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
+      Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
       ++WeakCrossingSIVsuccesses;
       if (!Result.DV[Level].Direction) {
         ++WeakCrossingSIVindependence;
@@ -1256,7 +1256,7 @@ bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,
   DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
   if (Remainder != 0) {
     // Equal direction isn't possible
-    Result.DV[Level].Direction &= ~Dependence::DVEntry::EQ;
+    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ);
     ++WeakCrossingSIVsuccesses;
   }
   return false;
@@ -2380,7 +2380,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
       DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
       if (Remainder != 0) {
         unsigned Level = mapSrcLoop(CurLoop);
-        Result.DV[Level - 1].Direction &= ~Dependence::DVEntry::EQ;
+        Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ);
         Improved = true;
       }
     }
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 5f51f775f1..95e58022ca 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -243,7 +243,8 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
   if (!TD)
     return false;
 
-  unsigned IntPtrWidth = TD->getPointerSizeInBits();
+  unsigned AS = GEP.getPointerAddressSpace();
+  unsigned IntPtrWidth = TD->getPointerSizeInBits(AS);
   assert(IntPtrWidth == Offset.getBitWidth());
 
   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
@@ -391,7 +392,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
   // Track base/offset pairs when converted to a plain integer provided the
   // integer is large enough to represent the pointer.
   unsigned IntegerSize = I.getType()->getScalarSizeInBits();
-  if (TD && IntegerSize >= TD->getPointerSizeInBits()) {
+  unsigned AS = I.getPointerAddressSpace();
+  if (TD && IntegerSize >= TD->getPointerSizeInBits(AS)) {
     std::pair<Value *, APInt> BaseAndOffset
       = ConstantOffsetPtrs.lookup(I.getOperand(0));
     if (BaseAndOffset.first)
@@ -425,7 +427,8 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
   // modifications provided the integer is not too large.
   Value *Op = I.getOperand(0);
   unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
-  if (TD && IntegerSize <= TD->getPointerSizeInBits()) {
+  unsigned AS = I.getAddressSpace();
+  if (TD && IntegerSize <= TD->getPointerSizeInBits(AS)) {
     std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
     if (BaseAndOffset.first)
       ConstantOffsetPtrs[&I] = BaseAndOffset;
@@ -760,7 +763,8 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
   if (!TD || !V->getType()->isPointerTy())
     return 0;
 
-  unsigned IntPtrWidth = TD->getPointerSizeInBits();
+  unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();;
+  unsigned IntPtrWidth = TD->getPointerSizeInBits(AS);
   APInt Offset = APInt::getNullValue(IntPtrWidth);
 
   // Even though we don't look through PHI nodes, we could be called on an
@@ -824,7 +828,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
         // size of the byval type by the target's pointer size.
         PointerType *PTy = cast<PointerType>(CS.getArgument(I)->getType());
         unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType());
-        unsigned PointerSize = TD->getPointerSizeInBits();
+        unsigned AS = PTy->getAddressSpace();
+        unsigned PointerSize = TD->getPointerSizeInBits(AS);
         // Ceiling division.
         unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize;
 
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index b3d62487fc..8e326122fa 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -666,7 +666,8 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
 /// 'Offset' APInt must be the bitwidth of the target's pointer size.
 static bool accumulateGEPOffset(const DataLayout &TD, GEPOperator *GEP,
                                 APInt &Offset) {
-  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  unsigned AS = GEP->getPointerAddressSpace();
+  unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
   assert(IntPtrWidth == Offset.getBitWidth());
 
   gep_type_iterator GTI = gep_type_begin(GEP);
@@ -696,12 +697,14 @@ static bool accumulateGEPOffset(const DataLayout &TD, GEPOperator *GEP,
 /// accumulates the total constant offset applied in the returned constant. It
 /// returns 0 if V is not a pointer, and returns the constant '0' if there are
 /// no constant offsets applied.
+/// FIXME: This function also exists in InlineCost.cpp.
 static Constant *stripAndComputeConstantOffsets(const DataLayout &TD,
                                                 Value *&V) {
   if (!V->getType()->isPointerTy())
     return 0;
 
-  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();;
+  unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
   APInt Offset = APInt::getNullValue(IntPtrWidth);
 
   // Even though we don't look through PHI nodes, we could be called on an
@@ -1877,7 +1880,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
     // if the integer type is the same size as the pointer type.
     if (MaxRecurse && Q.TD && isa<PtrToIntInst>(LI) &&
-        Q.TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
+        Q.TD->getPointerSizeInBits(
+          cast<PtrToIntInst>(LI)->getPointerAddressSpace()) ==
+        DstTy->getPrimitiveSizeInBits()) {
       if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
         // Transfer the cast to the constant.
         if (Value *V = SimplifyICmpInst(Pred, SrcOp,
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 951b442b87..1d7f0692cb 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -40,7 +40,8 @@ static unsigned getBitWidth(Type *Ty, const DataLayout *TD) {
   if (unsigned BitWidth = Ty->getScalarSizeInBits())
     return BitWidth;
   assert(isa<PointerType>(Ty) && "Expected a pointer type!");
-  return TD ? TD->getPointerSizeInBits() : 0;
+  return TD ?
+    TD->getPointerSizeInBits(cast<PointerType>(Ty)->getAddressSpace()) : 0;
 }
 
 static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
@@ -1621,7 +1622,8 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
   
   // Re-sign extend from the pointer size if needed to get overflow edge cases
   // right.
-  unsigned PtrSize = TD.getPointerSizeInBits();
+  unsigned AS = GEP->getPointerAddressSpace();
+  unsigned PtrSize = TD.getPointerSizeInBits(AS);
   if (PtrSize < 64)
     Offset = SignExtend64(Offset, PtrSize);
   
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 349dd0dad7..6f67e48f21 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -1443,7 +1443,8 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
     // Otherwise, handle normal operands.
     if (ParseOptionalAttrs(ArgAttrs, 0) || ParseValue(ArgTy, V, PFS))
       return true;
-    ArgList.push_back(ParamInfo(ArgLoc, V, Attributes::get(ArgAttrs)));
+    ArgList.push_back(ParamInfo(ArgLoc, V, Attributes::get(V->getContext(),
+                                                           ArgAttrs)));
   }
 
   Lex.Lex();  // Lex the ')'.
@@ -1492,7 +1493,9 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
     if (!FunctionType::isValidArgumentType(ArgTy))
       return Error(TypeLoc, "invalid type for function argument");
 
-    ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attributes::get(Attrs), Name));
+    ArgList.push_back(ArgInfo(TypeLoc, ArgTy,
+                              Attributes::get(ArgTy->getContext(),
+                                              Attrs), Name));
 
     while (EatIfPresent(lltok::comma)) {
       // Handle ... at end of arg list.
@@ -1518,7 +1521,9 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
       if (!ArgTy->isFirstClassType())
         return Error(TypeLoc, "invalid type for function argument");
 
-      ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attributes::get(Attrs), Name));
+      ArgList.push_back(ArgInfo(TypeLoc, ArgTy,
+                                Attributes::get(ArgTy->getContext(), Attrs),
+                                Name));
     }
   }
 
@@ -1542,7 +1547,7 @@ bool LLParser::ParseFunctionType(Type *&Result) {
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
     if (!ArgList[i].Name.empty())
       return Error(ArgList[i].Loc, "argument name invalid in function type");
-    if (ArgList[i].Attrs)
+    if (ArgList[i].Attrs.hasAttributes())
       return Error(ArgList[i].Loc,
                    "argument attributes invalid in function type");
   }
@@ -2766,7 +2771,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   SmallVector<AttributeWithIndex, 8> Attrs;
 
   if (RetAttrs.hasAttributes())
-    Attrs.push_back(AttributeWithIndex::get(0, Attributes::get(RetAttrs)));
+    Attrs.push_back(
+      AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                              Attributes::get(RetType->getContext(),
+                                              RetAttrs)));
 
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
     ParamTypeList.push_back(ArgList[i].Ty);
@@ -2775,7 +2783,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   }
 
   if (FuncAttrs.hasAttributes())
-    Attrs.push_back(AttributeWithIndex::get(~0, Attributes::get(FuncAttrs)));
+    Attrs.push_back(
+      AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                              Attributes::get(RetType->getContext(),
+                                              FuncAttrs)));
 
   AttrListPtr PAL = AttrListPtr::get(Attrs);
 
@@ -3297,7 +3308,10 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   // Set up the Attributes for the function.
   SmallVector<AttributeWithIndex, 8> Attrs;
   if (RetAttrs.hasAttributes())
-    Attrs.push_back(AttributeWithIndex::get(0, Attributes::get(RetAttrs)));
+    Attrs.push_back(
+      AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                              Attributes::get(Callee->getContext(),
+                                              RetAttrs)));
 
   SmallVector<Value*, 8> Args;
 
@@ -3325,7 +3339,10 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
     return Error(CallLoc, "not enough parameters specified for call");
 
   if (FnAttrs.hasAttributes())
-    Attrs.push_back(AttributeWithIndex::get(~0, Attributes::get(FnAttrs)));
+    Attrs.push_back(
+      AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                              Attributes::get(Callee->getContext(),
+                                              FnAttrs)));
 
   // Finish off the Attributes and check them
   AttrListPtr PAL = AttrListPtr::get(Attrs);
@@ -3693,7 +3710,10 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   // Set up the Attributes for the function.
   SmallVector<AttributeWithIndex, 8> Attrs;
   if (RetAttrs.hasAttributes())
-    Attrs.push_back(AttributeWithIndex::get(0, Attributes::get(RetAttrs)));
+    Attrs.push_back(
+      AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                              Attributes::get(Callee->getContext(),
+                                              RetAttrs)));
 
   SmallVector<Value*, 8> Args;
 
@@ -3721,7 +3741,10 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
     return Error(CallLoc, "not enough parameters specified for call");
 
   if (FnAttrs.hasAttributes())
-    Attrs.push_back(AttributeWithIndex::get(~0, Attributes::get(FnAttrs)));
+    Attrs.push_back(
+      AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                              Attributes::get(Callee->getContext(),
+                                              FnAttrs)));
 
   // Finish off the Attributes and check them
   AttrListPtr PAL = AttrListPtr::get(Attrs);
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 65b4919e10..d640246db2 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -476,14 +476,15 @@ bool BitcodeReader::ParseAttributeBlock() {
 
       for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
         Attributes ReconstitutedAttr =
-          Attributes::decodeLLVMAttributesForBitcode(Record[i+1]);
+          Attributes::decodeLLVMAttributesForBitcode(Context, Record[i+1]);
         Record[i+1] = ReconstitutedAttr.Raw();
       }
 
       for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
-        if (Attributes(Record[i+1]).hasAttributes())
+        Attributes::Builder B(Record[i+1]);
+        if (B.hasAttributes())
           Attrs.push_back(AttributeWithIndex::get(Record[i],
-                                                  Attributes(Record[i+1])));
+                                                  Attributes::get(Context, B)));
       }
 
       MAttributes.push_back(AttrListPtr::get(Attrs));
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 09e30eba57..110a294020 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -314,8 +314,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
   // the return. Ignore noalias because it doesn't affect the call sequence.
   const Function *F = ExitBB->getParent();
   Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
-  if (Attributes::Builder(CalleeRetAttr ^ CallerRetAttr)
-      .removeAttribute(Attributes::NoAlias).hasAttributes())
+  if (Attributes::Builder(CalleeRetAttr).removeAttribute(Attributes::NoAlias) !=
+      Attributes::Builder(CallerRetAttr).removeAttribute(Attributes::NoAlias))
     return false;
 
   // It's not safe to eliminate the sign / zero extension of the return value.
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index b4f0b174b5..788a89bf13 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -401,7 +401,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     //   - __tlv_bootstrap - used to make sure support exists
     //   - spare pointer, used when mapped by the runtime
     //   - pointer to mangled symbol above with initializer
-    unsigned PtrSize = TD->getPointerSizeInBits()/8;
+    unsigned AS = GV->getType()->getAddressSpace();
+    unsigned PtrSize = TD->getPointerSizeInBits(AS)/8;
     OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
                           PtrSize, 0);
     OutStreamer.EmitIntValue(0, PtrSize, 0);
@@ -1356,7 +1357,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
 
   // Emit the function pointers in the target-specific order
   const DataLayout *TD = TM.getDataLayout();
-  unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+  unsigned Align = Log2_32(TD->getPointerPrefAlignment(0));
   std::stable_sort(Structors.begin(), Structors.end(), priority_order);
   for (unsigned i = 0, e = Structors.size(); i != e; ++i) {
     const MCSection *OutputSection =
@@ -1537,8 +1538,9 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
     if (Offset == 0)
       return Base;
 
+    unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace();
     // Truncate/sext the offset to the pointer size.
-    unsigned Width = TD.getPointerSizeInBits();
+    unsigned Width = TD.getPointerSizeInBits(AS);
     if (Width < 64)
       Offset = SignExtend64(Offset, Width);
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index d94e1fe61b..6c17af2e8c 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -112,7 +112,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
   
   switch (Encoding & 0x07) {
   default: llvm_unreachable("Invalid encoded value.");
-  case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize();
+  case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize(0);
   case dwarf::DW_EH_PE_udata2: return 2;
   case dwarf::DW_EH_PE_udata4: return 4;
   case dwarf::DW_EH_PE_udata8: return 8;
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 4d73b3c222..73e18cd817 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -200,7 +200,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
   case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
   case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
   case dwarf::DW_FORM_addr:
-    Size = Asm->getDataLayout().getPointerSize(); break;
+    Size = Asm->getDataLayout().getPointerSize(0); break;
   default: llvm_unreachable("DIE Value form not supported yet");
   }
   Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/);
@@ -222,7 +222,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
   case dwarf::DW_FORM_data8: return sizeof(int64_t);
   case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
   case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
-  case dwarf::DW_FORM_addr:  return AP->getDataLayout().getPointerSize();
+  case dwarf::DW_FORM_addr:  return AP->getDataLayout().getPointerSize(0);
   default: llvm_unreachable("DIE Value form not supported yet");
   }
 }
@@ -249,7 +249,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
 unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
-  return AP->getDataLayout().getPointerSize();
+  return AP->getDataLayout().getPointerSize(0);
 }
 
 #ifndef NDEBUG
@@ -273,7 +273,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
 unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
-  return AP->getDataLayout().getPointerSize();
+  return AP->getDataLayout().getPointerSize(0);
 }
 
 #ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 9982d39641..466dc69da2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -384,7 +384,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU,
     // DW_AT_ranges appropriately.
     TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
                    DebugRangeSymbols.size() 
-                   * Asm->getDataLayout().getPointerSize());
+                   * Asm->getDataLayout().getPointerSize(0));
     for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
          RE = Ranges.end(); RI != RE; ++RI) {
       DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
@@ -450,7 +450,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU,
     // DW_AT_ranges appropriately.
     TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
                    DebugRangeSymbols.size() 
-                   * Asm->getDataLayout().getPointerSize());
+                   * Asm->getDataLayout().getPointerSize(0));
     for (SmallVector<InsnRange, 4>::const_iterator RI = Ranges.begin(),
          RE = Ranges.end(); RI != RE; ++RI) {
       DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
@@ -1793,7 +1793,7 @@ void DwarfDebug::emitDebugInfo() {
     Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"),
                            DwarfAbbrevSectionSym);
     Asm->OutStreamer.AddComment("Address Size (in bytes)");
-    Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+    Asm->EmitInt8(Asm->getDataLayout().getPointerSize(0));
 
     emitDIE(Die);
     Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID()));
@@ -1839,14 +1839,14 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
   Asm->EmitInt8(0);
 
   Asm->OutStreamer.AddComment("Op size");
-  Asm->EmitInt8(Asm->getDataLayout().getPointerSize() + 1);
+  Asm->EmitInt8(Asm->getDataLayout().getPointerSize(0) + 1);
   Asm->OutStreamer.AddComment("DW_LNE_set_address");
   Asm->EmitInt8(dwarf::DW_LNE_set_address);
 
   Asm->OutStreamer.AddComment("Section end label");
 
   Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd),
-                                   Asm->getDataLayout().getPointerSize(),
+                                   Asm->getDataLayout().getPointerSize(0),
                                    0/*AddrSpace*/);
 
   // Mark end of matrix.
@@ -2075,7 +2075,7 @@ void DwarfDebug::emitDebugLoc() {
   // Start the dwarf loc section.
   Asm->OutStreamer.SwitchSection(
     Asm->getObjFileLowering().getDwarfLocSection());
-  unsigned char Size = Asm->getDataLayout().getPointerSize();
+  unsigned char Size = Asm->getDataLayout().getPointerSize(0);
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
   unsigned index = 1;
   for (SmallVector<DotDebugLocEntry, 4>::iterator
@@ -2172,7 +2172,7 @@ void DwarfDebug::emitDebugRanges() {
   // Start the dwarf ranges section.
   Asm->OutStreamer.SwitchSection(
     Asm->getObjFileLowering().getDwarfRangesSection());
-  unsigned char Size = Asm->getDataLayout().getPointerSize();
+  unsigned char Size = Asm->getDataLayout().getPointerSize(0);
   for (SmallVector<const MCSymbol *, 8>::iterator
          I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
        I != E; ++I) {
@@ -2230,7 +2230,7 @@ void DwarfDebug::emitDebugInlineInfo() {
   Asm->OutStreamer.AddComment("Dwarf Version");
   Asm->EmitInt16(dwarf::DWARF_VERSION);
   Asm->OutStreamer.AddComment("Address Size (in bytes)");
-  Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
+  Asm->EmitInt8(Asm->getDataLayout().getPointerSize(0));
 
   for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
          E = InlinedSPNodes.end(); I != E; ++I) {
@@ -2261,7 +2261,7 @@ void DwarfDebug::emitDebugInlineInfo() {
 
       if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc");
       Asm->OutStreamer.EmitSymbolValue(LI->first,
-                                       Asm->getDataLayout().getPointerSize(),0);
+                                       Asm->getDataLayout().getPointerSize(0),0);
     }
   }
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 08fb6b3f52..31d07141a1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -417,7 +417,7 @@ void DwarfException::EmitExceptionTable() {
     // that we're omitting that bit.
     TTypeEncoding = dwarf::DW_EH_PE_omit;
     // dwarf::DW_EH_PE_absptr
-    TypeFormatSize = Asm->getDataLayout().getPointerSize();
+    TypeFormatSize = Asm->getDataLayout().getPointerSize(0);
   } else {
     // Okay, we have actual filters or typeinfos to emit.  As such, we need to
     // pick a type encoding for them.  We're about to emit a list of pointers to
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index f7c011968c..d0e27d1d04 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -91,7 +91,7 @@ void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
 /// either condition is detected in a function which uses the GC.
 ///
 void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
-  unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+  unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(0);
 
   AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
   EmitCamlGlobal(getModule(), AP, "code_end");
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 141f8edc83..8daac46954 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -761,38 +761,41 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill,
   LI->removeRange(Kill, MBBEnd);
   if (EndPoints) EndPoints->push_back(MBBEnd);
 
-  // Find all blocks that are reachable from MBB without leaving VNI's live
-  // range.
-  for (df_iterator<MachineBasicBlock*>
-       I = df_begin(KillMBB), E = df_end(KillMBB); I != E;) {
-    MachineBasicBlock *MBB = *I;
-    // KillMBB itself was already handled.
-    if (MBB == KillMBB) {
-      ++I;
-      continue;
-    }
+  // Find all blocks that are reachable from KillMBB without leaving VNI's live
+  // range. It is possible that KillMBB itself is reachable, so start a DFS
+  // from each successor.
+  typedef SmallPtrSet<MachineBasicBlock*, 9> VisitedTy;
+  VisitedTy Visited;
+  for (MachineBasicBlock::succ_iterator
+       SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end();
+       SuccI != SuccE; ++SuccI) {
+    for (df_ext_iterator<MachineBasicBlock*, VisitedTy>
+         I = df_ext_begin(*SuccI, Visited), E = df_ext_end(*SuccI, Visited);
+         I != E;) {
+      MachineBasicBlock *MBB = *I;
+
+      // Check if VNI is live in to MBB.
+      tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
+      LiveRangeQuery LRQ(*LI, MBBStart);
+      if (LRQ.valueIn() != VNI) {
+        // This block isn't part of the VNI live range. Prune the search.
+        I.skipChildren();
+        continue;
+      }
 
-    // Check if VNI is live in to MBB.
-    tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB);
-    LiveRangeQuery LRQ(*LI, MBBStart);
-    if (LRQ.valueIn() != VNI) {
-      // This block isn't part of the VNI live range. Prune the search.
-      I.skipChildren();
-      continue;
-    }
+      // Prune the search if VNI is killed in MBB.
+      if (LRQ.endPoint() < MBBEnd) {
+        LI->removeRange(MBBStart, LRQ.endPoint());
+        if (EndPoints) EndPoints->push_back(LRQ.endPoint());
+        I.skipChildren();
+        continue;
+      }
 
-    // Prune the search if VNI is killed in MBB.
-    if (LRQ.endPoint() < MBBEnd) {
-      LI->removeRange(MBBStart, LRQ.endPoint());
-      if (EndPoints) EndPoints->push_back(LRQ.endPoint());
-      I.skipChildren();
-      continue;
+      // VNI is live through MBB.
+      LI->removeRange(MBBStart, MBBEnd);
+      if (EndPoints) EndPoints->push_back(MBBEnd);
+      ++I;
     }
-
-    // VNI is live through MBB.
-    LI->removeRange(MBBStart, MBBEnd);
-    if (EndPoints) EndPoints->push_back(MBBEnd);
-    ++I;
   }
 }
 
@@ -1007,246 +1010,240 @@ private:
   LiveIntervals& LIS;
   const MachineRegisterInfo& MRI;
   const TargetRegisterInfo& TRI;
+  SlotIndex OldIdx;
   SlotIndex NewIdx;
-
-  typedef std::pair<LiveInterval*, LiveRange*> IntRangePair;
-  typedef DenseSet<IntRangePair> RangeSet;
-
-  struct RegRanges {
-    LiveRange* Use;
-    LiveRange* EC;
-    LiveRange* Dead;
-    LiveRange* Def;
-    RegRanges() : Use(0), EC(0), Dead(0), Def(0) {}
-  };
-  typedef DenseMap<unsigned, RegRanges> BundleRanges;
+  SmallPtrSet<LiveInterval*, 8> Updated;
 
 public:
   HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI,
-           const TargetRegisterInfo& TRI, SlotIndex NewIdx)
-    : LIS(LIS), MRI(MRI), TRI(TRI), NewIdx(NewIdx) {}
-
-  // Update intervals for all operands of MI from OldIdx to NewIdx.
-  // This assumes that MI used to be at OldIdx, and now resides at
-  // NewIdx.
-  void moveAllRangesFrom(MachineInstr* MI, SlotIndex OldIdx) {
-    assert(NewIdx != OldIdx && "No-op move? That's a bit strange.");
-
-    // Collect the operands.
-    RangeSet Entering, Internal, Exiting;
-    bool hasRegMaskOp = false;
-    collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
-
-    // To keep the LiveRanges valid within an interval, move the ranges closest
-    // to the destination first. This prevents ranges from overlapping, to that
-    // APIs like removeRange still work.
-    if (NewIdx < OldIdx) {
-      moveAllEnteringFrom(OldIdx, Entering);
-      moveAllInternalFrom(OldIdx, Internal);
-      moveAllExitingFrom(OldIdx, Exiting);
-    }
-    else {
-      moveAllExitingFrom(OldIdx, Exiting);
-      moveAllInternalFrom(OldIdx, Internal);
-      moveAllEnteringFrom(OldIdx, Entering);
-    }
-
-    if (hasRegMaskOp)
-      updateRegMaskSlots(OldIdx);
-
-#ifndef NDEBUG
-    LIValidator validator;
-    validator = std::for_each(Entering.begin(), Entering.end(), validator);
-    validator = std::for_each(Internal.begin(), Internal.end(), validator);
-    validator = std::for_each(Exiting.begin(), Exiting.end(), validator);
-    assert(validator.rangesOk() && "moveAllOperandsFrom broke liveness.");
-#endif
-
-  }
-
-  // Update intervals for all operands of MI to refer to BundleStart's
-  // SlotIndex.
-  void moveAllRangesInto(MachineInstr* MI, MachineInstr* BundleStart) {
-    if (MI == BundleStart)
-      return; // Bundling instr with itself - nothing to do.
-
-    SlotIndex OldIdx = LIS.getSlotIndexes()->getInstructionIndex(MI);
-    assert(LIS.getSlotIndexes()->getInstructionFromIndex(OldIdx) == MI &&
-           "SlotIndex <-> Instruction mapping broken for MI");
-
-    // Collect all ranges already in the bundle.
-    MachineBasicBlock::instr_iterator BII(BundleStart);
-    RangeSet Entering, Internal, Exiting;
-    bool hasRegMaskOp = false;
-    collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
-    assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
-    for (++BII; &*BII == MI || BII->isInsideBundle(); ++BII) {
-      if (&*BII == MI)
+           const TargetRegisterInfo& TRI,
+           SlotIndex OldIdx, SlotIndex NewIdx)
+    : LIS(LIS), MRI(MRI), TRI(TRI), OldIdx(OldIdx), NewIdx(NewIdx) {}
+
+  /// Update all live ranges touched by MI, assuming a move from OldIdx to
+  /// NewIdx.
+  void updateAllRanges(MachineInstr *MI) {
+    DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI);
+    bool hasRegMask = false;
+    for (MIOperands MO(MI); MO.isValid(); ++MO) {
+      if (MO->isRegMask())
+        hasRegMask = true;
+      if (!MO->isReg())
         continue;
-      collectRanges(BII, Entering, Internal, Exiting, hasRegMaskOp, NewIdx);
-      assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
-    }
-
-    BundleRanges BR = createBundleRanges(Entering, Internal, Exiting);
-
-    Entering.clear();
-    Internal.clear();
-    Exiting.clear();
-    collectRanges(MI, Entering, Internal, Exiting, hasRegMaskOp, OldIdx);
-    assert(!hasRegMaskOp && "Can't have RegMask operand in bundle.");
-
-    DEBUG(dbgs() << "Entering: " << Entering.size() << "\n");
-    DEBUG(dbgs() << "Internal: " << Internal.size() << "\n");
-    DEBUG(dbgs() << "Exiting: " << Exiting.size() << "\n");
-
-    moveAllEnteringFromInto(OldIdx, Entering, BR);
-    moveAllInternalFromInto(OldIdx, Internal, BR);
-    moveAllExitingFromInto(OldIdx, Exiting, BR);
+      // Aggressively clear all kill flags.
+      // They are reinserted by VirtRegRewriter.
+      if (MO->isUse())
+        MO->setIsKill(false);
 
+      unsigned Reg = MO->getReg();
+      if (!Reg)
+        continue;
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        updateRange(LIS.getInterval(Reg));
+        continue;
+      }
 
-#ifndef NDEBUG
-    LIValidator validator;
-    validator = std::for_each(Entering.begin(), Entering.end(), validator);
-    validator = std::for_each(Internal.begin(), Internal.end(), validator);
-    validator = std::for_each(Exiting.begin(), Exiting.end(), validator);
-    assert(validator.rangesOk() && "moveAllOperandsInto broke liveness.");
-#endif
+      // For physregs, only update the regunits that actually have a
+      // precomputed live range.
+      for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
+        if (LiveInterval *LI = LIS.getCachedRegUnit(*Units))
+          updateRange(*LI);
+    }
+    if (hasRegMask)
+      updateRegMaskSlots();
   }
 
 private:
+  /// Update a single live range, assuming an instruction has been moved from
+  /// OldIdx to NewIdx.
+  void updateRange(LiveInterval &LI) {
+    if (!Updated.insert(&LI))
+      return;
+    DEBUG({
+      dbgs() << "     ";
+      if (TargetRegisterInfo::isVirtualRegister(LI.reg))
+        dbgs() << PrintReg(LI.reg);
+      else
+        dbgs() << PrintRegUnit(LI.reg, &TRI);
+      dbgs() << ":\t" << LI << '\n';
+    });
+    if (SlotIndex::isEarlierInstr(OldIdx, NewIdx))
+      handleMoveDown(LI);
+    else
+      handleMoveUp(LI);
+    DEBUG(dbgs() << "        -->\t" << LI << '\n');
+    LI.verify();
+  }
+
+  /// Update LI to reflect an instruction has been moved downwards from OldIdx
+  /// to NewIdx.
+  ///
+  /// 1. Live def at OldIdx:
+  ///    Move def to NewIdx, assert endpoint after NewIdx.
+  ///
+  /// 2. Live def at OldIdx, killed at NewIdx:
+  ///    Change to dead def at NewIdx.
+  ///    (Happens when bundling def+kill together).
+  ///
+  /// 3. Dead def at OldIdx:
+  ///    Move def to NewIdx, possibly across another live value.
+  ///
+  /// 4. Def at OldIdx AND at NewIdx:
+  ///    Remove live range [OldIdx;NewIdx) and value defined at OldIdx.
+  ///    (Happens when bundling multiple defs together).
+  ///
+  /// 5. Value read at OldIdx, killed before NewIdx:
+  ///    Extend kill to NewIdx.
+  ///
+  void handleMoveDown(LiveInterval &LI) {
+    // First look for a kill at OldIdx.
+    LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
+    LiveInterval::iterator E = LI.end();
+    // Is LI even live at OldIdx?
+    if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
+      return;
 
-#ifndef NDEBUG
-  class LIValidator {
-  private:
-    DenseSet<const LiveInterval*> Checked, Bogus;
-  public:
-    void operator()(const IntRangePair& P) {
-      const LiveInterval* LI = P.first;
-      if (Checked.count(LI))
+    // Handle a live-in value.
+    if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
+      bool isKill = SlotIndex::isSameInstr(OldIdx, I->end);
+      // If the live-in value already extends to NewIdx, there is nothing to do.
+      if (!SlotIndex::isEarlierInstr(I->end, NewIdx))
         return;
-      Checked.insert(LI);
-      if (LI->empty())
+      // Aggressively remove all kill flags from the old kill point.
+      // Kill flags shouldn't be used while live intervals exist, they will be
+      // reinserted by VirtRegRewriter.
+      if (MachineInstr *KillMI = LIS.getInstructionFromIndex(I->end))
+        for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO)
+          if (MO->isReg() && MO->isUse())
+            MO->setIsKill(false);
+      // Adjust I->end to reach NewIdx. This may temporarily make LI invalid by
+      // overlapping ranges. Case 5 above.
+      I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
+      // If this was a kill, there may also be a def. Otherwise we're done.
+      if (!isKill)
         return;
-      SlotIndex LastEnd = LI->begin()->start;
-      for (LiveInterval::const_iterator LRI = LI->begin(), LRE = LI->end();
-           LRI != LRE; ++LRI) {
-        const LiveRange& LR = *LRI;
-        if (LastEnd > LR.start || LR.start >= LR.end)
-          Bogus.insert(LI);
-        LastEnd = LR.end;
-      }
-    }
-
-    bool rangesOk() const {
-      return Bogus.empty();
-    }
-  };
-#endif
-
-  // Collect IntRangePairs for all operands of MI that may need fixing.
-  // Treat's MI's index as OldIdx (regardless of what it is in SlotIndexes'
-  // maps).
-  void collectRanges(MachineInstr* MI, RangeSet& Entering, RangeSet& Internal,
-                     RangeSet& Exiting, bool& hasRegMaskOp, SlotIndex OldIdx) {
-    hasRegMaskOp = false;
-    for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
-                                    MOE = MI->operands_end();
-         MOI != MOE; ++MOI) {
-      const MachineOperand& MO = *MOI;
-
-      if (MO.isRegMask()) {
-        hasRegMaskOp = true;
-        continue;
-      }
-
-      if (!MO.isReg() || MO.getReg() == 0)
-        continue;
-
-      unsigned Reg = MO.getReg();
-
-      // Don't track uses of reserved registers - they're not accurate.
-      // Reserved register live ranges look like a set of dead defs.
-      bool Resv =
-        TargetRegisterInfo::isPhysicalRegister(Reg) && LIS.isReserved(Reg);
-
-      // Collect ranges for register units. These live ranges are computed on
-      // demand, so just skip any that haven't been computed yet.
-      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-        for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
-          if (LiveInterval *LI = LIS.getCachedRegUnit(*Units))
-            collectRanges(MO, LI, Entering, Internal, Exiting, OldIdx, Resv);
-      } else {
-        // Collect ranges for individual virtual registers.
-        collectRanges(MO, &LIS.getInterval(Reg),
-                      Entering, Internal, Exiting, OldIdx);
-      }
+      ++I;
     }
-  }
 
-  void collectRanges(const MachineOperand &MO, LiveInterval *LI,
-                     RangeSet &Entering, RangeSet &Internal, RangeSet &Exiting,
-                     SlotIndex OldIdx, bool IgnoreReads = false) {
-    if (!IgnoreReads && MO.readsReg()) {
-      LiveRange* LR = LI->getLiveRangeContaining(OldIdx);
-      if (LR != 0)
-        Entering.insert(std::make_pair(LI, LR));
+    // Check for a def at OldIdx.
+    if (I == E || !SlotIndex::isSameInstr(OldIdx, I->start))
+      return;
+    // We have a def at OldIdx.
+    VNInfo *DefVNI = I->valno;
+    assert(DefVNI->def == I->start && "Inconsistent def");
+    DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
+    // If the defined value extends beyond NewIdx, just move the def down.
+    // This is case 1 above.
+    if (SlotIndex::isEarlierInstr(NewIdx, I->end)) {
+      I->start = DefVNI->def;
+      return;
     }
-    if (MO.isDef()) {
-      LiveRange* LR = LI->getLiveRangeContaining(OldIdx.getRegSlot());
-      assert(LR != 0 && "No live range for def?");
-      if (LR->end > OldIdx.getDeadSlot())
-        Exiting.insert(std::make_pair(LI, LR));
-      else
-        Internal.insert(std::make_pair(LI, LR));
+    // The remaining possibilities are now:
+    // 2. Live def at OldIdx, killed at NewIdx: isSameInstr(I->end, NewIdx).
+    // 3. Dead def at OldIdx: I->end = OldIdx.getDeadSlot().
+    // In either case, it is possible that there is an existing def at NewIdx.
+    assert((I->end == OldIdx.getDeadSlot() ||
+            SlotIndex::isSameInstr(I->end, NewIdx)) &&
+            "Cannot move def below kill");
+    LiveInterval::iterator NewI = LI.advanceTo(I, NewIdx.getRegSlot());
+    if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) {
+      // There is an existing def at NewIdx, case 4 above. The def at OldIdx is
+      // coalesced into that value.
+      assert(NewI->valno != DefVNI && "Multiple defs of value?");
+      LI.removeValNo(DefVNI);
+      return;
     }
+    // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx.
+    // If the def at OldIdx was dead, we allow it to be moved across other LI
+    // values. The new range should be placed immediately before NewI, move any
+    // intermediate ranges up.
+    assert(NewI != I && "Inconsistent iterators");
+    std::copy(llvm::next(I), NewI, I);
+    *llvm::prior(NewI) = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
   }
 
-  BundleRanges createBundleRanges(RangeSet& Entering,
-                                  RangeSet& Internal,
-                                  RangeSet& Exiting) {
-    BundleRanges BR;
+  /// Update LI to reflect an instruction has been moved upwards from OldIdx
+  /// to NewIdx.
+  ///
+  /// 1. Live def at OldIdx:
+  ///    Hoist def to NewIdx.
+  ///
+  /// 2. Dead def at OldIdx:
+  ///    Hoist def+end to NewIdx, possibly move across other values.
+  ///
+  /// 3. Dead def at OldIdx AND existing def at NewIdx:
+  ///    Remove value defined at OldIdx, coalescing it with existing value.
+  ///
+  /// 4. Live def at OldIdx AND existing def at NewIdx:
+  ///    Remove value defined at NewIdx, hoist OldIdx def to NewIdx.
+  ///    (Happens when bundling multiple defs together).
+  ///
+  /// 5. Value killed at OldIdx:
+  ///    Hoist kill to NewIdx, then scan for last kill between NewIdx and
+  ///    OldIdx.
+  ///
+  void handleMoveUp(LiveInterval &LI) {
+    // First look for a kill at OldIdx.
+    LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex());
+    LiveInterval::iterator E = LI.end();
+    // Is LI even live at OldIdx?
+    if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start))
+      return;
 
-    for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-         EI != EE; ++EI) {
-      LiveInterval* LI = EI->first;
-      LiveRange* LR = EI->second;
-      BR[LI->reg].Use = LR;
+    // Handle a live-in value.
+    if (!SlotIndex::isSameInstr(I->start, OldIdx)) {
+      // If the live-in value isn't killed here, there is nothing to do.
+      if (!SlotIndex::isSameInstr(OldIdx, I->end))
+        return;
+      // Adjust I->end to end at NewIdx. If we are hoisting a kill above
+      // another use, we need to search for that use. Case 5 above.
+      I->end = NewIdx.getRegSlot(I->end.isEarlyClobber());
+      ++I;
+      // If OldIdx also defines a value, there couldn't have been another use.
+      if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) {
+        // No def, search for the new kill.
+        // This can never be an early clobber kill since there is no def.
+        llvm::prior(I)->end = findLastUseBefore(LI.reg).getRegSlot();
+        return;
+      }
     }
 
-    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
-         II != IE; ++II) {
-      LiveInterval* LI = II->first;
-      LiveRange* LR = II->second;
-      if (LR->end.isDead()) {
-        BR[LI->reg].Dead = LR;
-      } else {
-        BR[LI->reg].EC = LR;
+    // Now deal with the def at OldIdx.
+    assert(I != E && SlotIndex::isSameInstr(I->start, OldIdx) && "No def?");
+    VNInfo *DefVNI = I->valno;
+    assert(DefVNI->def == I->start && "Inconsistent def");
+    DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber());
+
+    // Check for an existing def at NewIdx.
+    LiveInterval::iterator NewI = LI.find(NewIdx.getRegSlot());
+    if (SlotIndex::isSameInstr(NewI->start, NewIdx)) {
+      assert(NewI->valno != DefVNI && "Same value defined more than once?");
+      // There is an existing def at NewIdx.
+      if (I->end.isDead()) {
+        // Case 3: Remove the dead def at OldIdx.
+        LI.removeValNo(DefVNI);
+        return;
       }
+      // Case 4: Replace def at NewIdx with live def at OldIdx.
+      I->start = DefVNI->def;
+      LI.removeValNo(NewI->valno);
+      return;
     }
 
-    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
-         EI != EE; ++EI) {
-      LiveInterval* LI = EI->first;
-      LiveRange* LR = EI->second;
-      BR[LI->reg].Def = LR;
+    // There is no existing def at NewIdx. Hoist DefVNI.
+    if (!I->end.isDead()) {
+      // Leave the end point of a live def.
+      I->start = DefVNI->def;
+      return;
     }
 
-    return BR;
-  }
-
-  void moveKillFlags(unsigned reg, SlotIndex OldIdx, SlotIndex newKillIdx) {
-    MachineInstr* OldKillMI = LIS.getInstructionFromIndex(OldIdx);
-    if (!OldKillMI->killsRegister(reg))
-      return; // Bail out if we don't have kill flags on the old register.
-    MachineInstr* NewKillMI = LIS.getInstructionFromIndex(newKillIdx);
-    assert(OldKillMI->killsRegister(reg) && "Old 'kill' instr isn't a kill.");
-    assert(!NewKillMI->killsRegister(reg) &&
-           "New kill instr is already a kill.");
-    OldKillMI->clearRegisterKills(reg, &TRI);
-    NewKillMI->addRegisterKilled(reg, &TRI);
+    // DefVNI is a dead def. It may have been moved across other values in LI,
+    // so move I up to NewI. Slide [NewI;I) down one position.
+    std::copy_backward(NewI, I, llvm::next(I));
+    *NewI = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI);
   }
 
-  void updateRegMaskSlots(SlotIndex OldIdx) {
+  void updateRegMaskSlots() {
     SmallVectorImpl<SlotIndex>::iterator RI =
       std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
                        OldIdx);
@@ -1257,7 +1254,7 @@ private:
   }
 
   // Return the last use of reg between NewIdx and OldIdx.
-  SlotIndex findLastUseBefore(unsigned Reg, SlotIndex OldIdx) {
+  SlotIndex findLastUseBefore(unsigned Reg) {
     SlotIndex LastUse = NewIdx;
 
     if (TargetRegisterInfo::isVirtualRegister(Reg)) {
@@ -1291,233 +1288,25 @@ private:
     }
     return LastUse;
   }
-
-  void moveEnteringUpFrom(SlotIndex OldIdx, IntRangePair& P) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    bool LiveThrough = LR->end > OldIdx.getRegSlot();
-    if (LiveThrough)
-      return;
-    SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
-    if (LastUse != NewIdx)
-      moveKillFlags(LI->reg, NewIdx, LastUse);
-    LR->end = LastUse.getRegSlot(LR->end.isEarlyClobber());
-  }
-
-  void moveEnteringDownFrom(SlotIndex OldIdx, IntRangePair& P) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    // Extend the LiveRange if NewIdx is past the end.
-    if (NewIdx > LR->end) {
-      // Move kill flags if OldIdx was not originally the end
-      // (otherwise LR->end points to an invalid slot).
-      if (LR->end.getRegSlot() != OldIdx.getRegSlot()) {
-        assert(LR->end > OldIdx && "LiveRange does not cover original slot");
-        moveKillFlags(LI->reg, LR->end, NewIdx);
-      }
-      LR->end = NewIdx.getRegSlot(LR->end.isEarlyClobber());
-    }
-  }
-
-  void moveAllEnteringFrom(SlotIndex OldIdx, RangeSet& Entering) {
-    bool GoingUp = NewIdx < OldIdx;
-
-    if (GoingUp) {
-      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-           EI != EE; ++EI)
-        moveEnteringUpFrom(OldIdx, *EI);
-    } else {
-      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-           EI != EE; ++EI)
-        moveEnteringDownFrom(OldIdx, *EI);
-    }
-  }
-
-  void moveInternalFrom(SlotIndex OldIdx, IntRangePair& P) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
-           LR->end <= OldIdx.getDeadSlot() &&
-           "Range should be internal to OldIdx.");
-    LiveRange Tmp(*LR);
-    Tmp.start = NewIdx.getRegSlot(LR->start.isEarlyClobber());
-    Tmp.valno->def = Tmp.start;
-    Tmp.end = LR->end.isDead() ? NewIdx.getDeadSlot() : NewIdx.getRegSlot();
-    LI->removeRange(*LR);
-    LI->addRange(Tmp);
-  }
-
-  void moveAllInternalFrom(SlotIndex OldIdx, RangeSet& Internal) {
-    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
-         II != IE; ++II)
-      moveInternalFrom(OldIdx, *II);
-  }
-
-  void moveExitingFrom(SlotIndex OldIdx, IntRangePair& P) {
-    LiveRange* LR = P.second;
-    assert(OldIdx < LR->start && LR->start < OldIdx.getDeadSlot() &&
-           "Range should start in OldIdx.");
-    assert(LR->end > OldIdx.getDeadSlot() && "Range should exit OldIdx.");
-    SlotIndex NewStart = NewIdx.getRegSlot(LR->start.isEarlyClobber());
-    LR->start = NewStart;
-    LR->valno->def = NewStart;
-  }
-
-  void moveAllExitingFrom(SlotIndex OldIdx, RangeSet& Exiting) {
-    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
-         EI != EE; ++EI)
-      moveExitingFrom(OldIdx, *EI);
-  }
-
-  void moveEnteringUpFromInto(SlotIndex OldIdx, IntRangePair& P,
-                              BundleRanges& BR) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    bool LiveThrough = LR->end > OldIdx.getRegSlot();
-    if (LiveThrough) {
-      assert((LR->start < NewIdx || BR[LI->reg].Def == LR) &&
-             "Def in bundle should be def range.");
-      assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
-             "If bundle has use for this reg it should be LR.");
-      BR[LI->reg].Use = LR;
-      return;
-    }
-
-    SlotIndex LastUse = findLastUseBefore(LI->reg, OldIdx);
-    moveKillFlags(LI->reg, OldIdx, LastUse);
-
-    if (LR->start < NewIdx) {
-      // Becoming a new entering range.
-      assert(BR[LI->reg].Dead == 0 && BR[LI->reg].Def == 0 &&
-             "Bundle shouldn't be re-defining reg mid-range.");
-      assert((BR[LI->reg].Use == 0 || BR[LI->reg].Use == LR) &&
-             "Bundle shouldn't have different use range for same reg.");
-      LR->end = LastUse.getRegSlot();
-      BR[LI->reg].Use = LR;
-    } else {
-      // Becoming a new Dead-def.
-      assert(LR->start == NewIdx.getRegSlot(LR->start.isEarlyClobber()) &&
-             "Live range starting at unexpected slot.");
-      assert(BR[LI->reg].Def == LR && "Reg should have def range.");
-      assert(BR[LI->reg].Dead == 0 &&
-               "Can't have def and dead def of same reg in a bundle.");
-      LR->end = LastUse.getDeadSlot();
-      BR[LI->reg].Dead = BR[LI->reg].Def;
-      BR[LI->reg].Def = 0;
-    }
-  }
-
-  void moveEnteringDownFromInto(SlotIndex OldIdx, IntRangePair& P,
-                                BundleRanges& BR) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-    if (NewIdx > LR->end) {
-      // Range extended to bundle. Add to bundle uses.
-      // Note: Currently adds kill flags to bundle start.
-      assert(BR[LI->reg].Use == 0 &&
-             "Bundle already has use range for reg.");
-      moveKillFlags(LI->reg, LR->end, NewIdx);
-      LR->end = NewIdx.getRegSlot();
-      BR[LI->reg].Use = LR;
-    } else {
-      assert(BR[LI->reg].Use != 0 &&
-             "Bundle should already have a use range for reg.");
-    }
-  }
-
-  void moveAllEnteringFromInto(SlotIndex OldIdx, RangeSet& Entering,
-                               BundleRanges& BR) {
-    bool GoingUp = NewIdx < OldIdx;
-
-    if (GoingUp) {
-      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-           EI != EE; ++EI)
-        moveEnteringUpFromInto(OldIdx, *EI, BR);
-    } else {
-      for (RangeSet::iterator EI = Entering.begin(), EE = Entering.end();
-           EI != EE; ++EI)
-        moveEnteringDownFromInto(OldIdx, *EI, BR);
-    }
-  }
-
-  void moveInternalFromInto(SlotIndex OldIdx, IntRangePair& P,
-                            BundleRanges& BR) {
-    // TODO: Sane rules for moving ranges into bundles.
-  }
-
-  void moveAllInternalFromInto(SlotIndex OldIdx, RangeSet& Internal,
-                               BundleRanges& BR) {
-    for (RangeSet::iterator II = Internal.begin(), IE = Internal.end();
-         II != IE; ++II)
-      moveInternalFromInto(OldIdx, *II, BR);
-  }
-
-  void moveExitingFromInto(SlotIndex OldIdx, IntRangePair& P,
-                           BundleRanges& BR) {
-    LiveInterval* LI = P.first;
-    LiveRange* LR = P.second;
-
-    assert(LR->start.isRegister() &&
-           "Don't know how to merge exiting ECs into bundles yet.");
-
-    if (LR->end > NewIdx.getDeadSlot()) {
-      // This range is becoming an exiting range on the bundle.
-      // If there was an old dead-def of this reg, delete it.
-      if (BR[LI->reg].Dead != 0) {
-        LI->removeRange(*BR[LI->reg].Dead);
-        BR[LI->reg].Dead = 0;
-      }
-      assert(BR[LI->reg].Def == 0 &&
-             "Can't have two defs for the same variable exiting a bundle.");
-      LR->start = NewIdx.getRegSlot();
-      LR->valno->def = LR->start;
-      BR[LI->reg].Def = LR;
-    } else {
-      // This range is becoming internal to the bundle.
-      assert(LR->end == NewIdx.getRegSlot() &&
-             "Can't bundle def whose kill is before the bundle");
-      if (BR[LI->reg].Dead || BR[LI->reg].Def) {
-        // Already have a def for this. Just delete range.
-        LI->removeRange(*LR);
-      } else {
-        // Make range dead, record.
-        LR->end = NewIdx.getDeadSlot();
-        BR[LI->reg].Dead = LR;
-        assert(BR[LI->reg].Use == LR &&
-               "Range becoming dead should currently be use.");
-      }
-      // In both cases the range is no longer a use on the bundle.
-      BR[LI->reg].Use = 0;
-    }
-  }
-
-  void moveAllExitingFromInto(SlotIndex OldIdx, RangeSet& Exiting,
-                              BundleRanges& BR) {
-    for (RangeSet::iterator EI = Exiting.begin(), EE = Exiting.end();
-         EI != EE; ++EI)
-      moveExitingFromInto(OldIdx, *EI, BR);
-  }
-
 };
 
 void LiveIntervals::handleMove(MachineInstr* MI) {
+  assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
   SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
   Indexes->removeMachineInstrFromMaps(MI);
-  SlotIndex NewIndex = MI->isInsideBundle() ?
-                        Indexes->getInstructionIndex(MI) :
-                        Indexes->insertMachineInstrInMaps(MI);
+  SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI);
   assert(getMBBStartIdx(MI->getParent()) <= OldIndex &&
          OldIndex < getMBBEndIdx(MI->getParent()) &&
          "Cannot handle moves across basic block boundaries.");
-  assert(!MI->isBundled() && "Can't handle bundled instructions yet.");
 
-  HMEditor HME(*this, *MRI, *TRI, NewIndex);
-  HME.moveAllRangesFrom(MI, OldIndex);
+  HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex);
+  HME.updateAllRanges(MI);
 }
 
 void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI,
                                          MachineInstr* BundleStart) {
+  SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
   SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart);
-  HMEditor HME(*this, *MRI, *TRI, NewIndex);
-  HME.moveAllRangesInto(MI, BundleStart);
+  HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex);
+  HME.updateAllRanges(MI);
 }
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 91d5211857..f11785070b 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -550,7 +550,7 @@ unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const {
   // address of a block, in which case it is the pointer size.
   switch (getEntryKind()) {
   case MachineJumpTableInfo::EK_BlockAddress:
-    return TD.getPointerSize();
+    return TD.getPointerSize(0);
   case MachineJumpTableInfo::EK_GPRel64BlockAddress:
     return 8;
   case MachineJumpTableInfo::EK_GPRel32BlockAddress:
@@ -570,7 +570,7 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const {
   // alignment.
   switch (getEntryKind()) {
   case MachineJumpTableInfo::EK_BlockAddress:
-    return TD.getPointerABIAlignment();
+    return TD.getPointerABIAlignment(0);
   case MachineJumpTableInfo::EK_GPRel64BlockAddress:
     return TD.getABIIntegerTypeAlignment(64);
   case MachineJumpTableInfo::EK_GPRel32BlockAddress:
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 11a7d4760c..74190e9354 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/ScheduleDAGILP.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Support/CommandLine.h"
@@ -451,26 +452,6 @@ updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
   }
 }
 
-// Release all DAG roots for scheduling.
-void ScheduleDAGMI::releaseRoots() {
-  SmallVector<SUnit*, 16> BotRoots;
-
-  for (std::vector<SUnit>::iterator
-         I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
-    // A SUnit is ready to top schedule if it has no predecessors.
-    if (I->Preds.empty())
-      SchedImpl->releaseTopNode(&(*I));
-    // A SUnit is ready to bottom schedule if it has no successors.
-    if (I->Succs.empty())
-      BotRoots.push_back(&(*I));
-  }
-  // Release bottom roots in reverse order so the higher priority nodes appear
-  // first. This is more natural and slightly more efficient.
-  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
-         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
-    SchedImpl->releaseBottomNode(*I);
-}
-
 /// schedule - Called back from MachineScheduler::runOnMachineFunction
 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
 /// only includes instructions that have DAG nodes, not scheduling boundaries.
@@ -532,8 +513,29 @@ void ScheduleDAGMI::postprocessDAG() {
   }
 }
 
+// Release all DAG roots for scheduling.
+void ScheduleDAGMI::releaseRoots() {
+  SmallVector<SUnit*, 16> BotRoots;
+
+  for (std::vector<SUnit>::iterator
+         I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
+    // A SUnit is ready to top schedule if it has no predecessors.
+    if (I->Preds.empty())
+      SchedImpl->releaseTopNode(&(*I));
+    // A SUnit is ready to bottom schedule if it has no successors.
+    if (I->Succs.empty())
+      BotRoots.push_back(&(*I));
+  }
+  // Release bottom roots in reverse order so the higher priority nodes appear
+  // first. This is more natural and slightly more efficient.
+  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
+         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
+    SchedImpl->releaseBottomNode(*I);
+}
+
 /// Identify DAG roots and setup scheduler queues.
 void ScheduleDAGMI::initQueues() {
+
   // Initialize the strategy before modifying the DAG.
   SchedImpl->initialize(this);
 
@@ -544,6 +546,8 @@ void ScheduleDAGMI::initQueues() {
   // Release all DAG roots for scheduling.
   releaseRoots();
 
+  SchedImpl->registerRoots();
+
   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
   CurrentBottom = RegionEnd;
 }
@@ -1198,6 +1202,86 @@ ConvergingSchedRegistry("converge", "Standard converging scheduler.",
                         createConvergingSched);
 
 //===----------------------------------------------------------------------===//
+// ILP Scheduler. Currently for experimental analysis of heuristics.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// \brief Order nodes by the ILP metric.
+struct ILPOrder {
+  ScheduleDAGILP *ILP;
+  bool MaximizeILP;
+
+  ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {}
+
+  /// \brief Apply a less-than relation on node priority.
+  bool operator()(const SUnit *A, const SUnit *B) const {
+    // Return true if A comes after B in the Q.
+    if (MaximizeILP)
+      return ILP->getILP(A) < ILP->getILP(B);
+    else
+      return ILP->getILP(A) > ILP->getILP(B);
+  }
+};
+
+/// \brief Schedule based on the ILP metric.
+class ILPScheduler : public MachineSchedStrategy {
+  ScheduleDAGILP ILP;
+  ILPOrder Cmp;
+
+  std::vector<SUnit*> ReadyQ;
+public:
+  ILPScheduler(bool MaximizeILP)
+  : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {}
+
+  virtual void initialize(ScheduleDAGMI *DAG) {
+    ReadyQ.clear();
+    ILP.resize(DAG->SUnits.size());
+  }
+
+  virtual void registerRoots() {
+    for (std::vector<SUnit*>::const_iterator
+           I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) {
+      ILP.computeILP(*I);
+    }
+  }
+
+  /// Implement MachineSchedStrategy interface.
+  /// -----------------------------------------
+
+  virtual SUnit *pickNode(bool &IsTopNode) {
+    if (ReadyQ.empty()) return NULL;
+    pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+    SUnit *SU = ReadyQ.back();
+    ReadyQ.pop_back();
+    IsTopNode = false;
+    DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr()
+          << " ILP: " << ILP.getILP(SU) << '\n');
+    return SU;
+  }
+
+  virtual void schedNode(SUnit *, bool) {}
+
+  virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
+
+  virtual void releaseBottomNode(SUnit *SU) {
+    ReadyQ.push_back(SU);
+    std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+  }
+};
+} // namespace
+
+static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
+  return new ScheduleDAGMI(C, new ILPScheduler(true));
+}
+static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
+  return new ScheduleDAGMI(C, new ILPScheduler(false));
+}
+static MachineSchedRegistry ILPMaxRegistry(
+  "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
+static MachineSchedRegistry ILPMinRegistry(
+  "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
+
+//===----------------------------------------------------------------------===//
 // Machine Instruction Shuffler for Correctness Testing
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 9099862bd3..a795ac8448 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -527,6 +527,11 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
         SeenMoveImm = true;
       } else {
         Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
+        // optimizeExtInstr might have created new instructions after MI
+        // and before the already incremented MII. Adjust MII so that the
+        // next iteration sees the new instructions.
+        MII = MI;
+        ++MII;
         if (SeenMoveImm)
           Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
       }
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 1b46256baf..ad515c1064 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -1241,6 +1241,9 @@ class JoinVals {
     // Value in the other live range that overlaps this def, if any.
     VNInfo *OtherVNI;
 
+    // Is this value an IMPLICIT_DEF?
+    bool IsImplicitDef;
+
     // True when the live range of this value will be pruned because of an
     // overlapping CR_Replace value in the other live range.
     bool Pruned;
@@ -1249,7 +1252,8 @@ class JoinVals {
     bool PrunedComputed;
 
     Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
-            RedefVNI(0), OtherVNI(0), Pruned(false), PrunedComputed(false) {}
+            RedefVNI(0), OtherVNI(0), IsImplicitDef(false), Pruned(false),
+            PrunedComputed(false) {}
 
     bool isAnalyzed() const { return WriteLanes != 0; }
   };
@@ -1385,8 +1389,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
     }
 
     // An IMPLICIT_DEF writes undef values.
-    if (DefMI->isImplicitDef())
+    if (DefMI->isImplicitDef()) {
+      V.IsImplicitDef = true;
       V.ValidLanes &= ~V.WriteLanes;
+    }
   }
 
   // Find the value in Other that overlaps VNI->def, if any.
@@ -1724,22 +1730,34 @@ void JoinVals::pruneValues(JoinVals &Other,
     switch (Vals[i].Resolution) {
     case CR_Keep:
       break;
-    case CR_Replace:
+    case CR_Replace: {
       // This value takes precedence over the value in Other.LI.
       LIS->pruneValue(&Other.LI, Def, &EndPoints);
-      // Remove <def,read-undef> flags. This def is now a partial redef.
+      // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
+      // instructions are only inserted to provide a live-out value for PHI
+      // predecessors, so the instruction should simply go away once its value
+      // has been replaced.
+      Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
+      bool EraseImpDef = OtherV.IsImplicitDef && OtherV.Resolution == CR_Keep;
       if (!Def.isBlock()) {
+        // Remove <def,read-undef> flags. This def is now a partial redef.
+        // Also remove <def,dead> flags since the joined live range will
+        // continue past this instruction.
         for (MIOperands MO(Indexes->getInstructionFromIndex(Def));
              MO.isValid(); ++MO)
-          if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg)
-            MO->setIsUndef(false);
-	// This value will reach instructions below, but we need to make sure
-	// the live range also reaches the instruction at Def.
-	EndPoints.push_back(Def);
+          if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) {
+            MO->setIsUndef(EraseImpDef);
+            MO->setIsDead(false);
+          }
+        // This value will reach instructions below, but we need to make sure
+        // the live range also reaches the instruction at Def.
+        if (!EraseImpDef)
+          EndPoints.push_back(Def);
       }
       DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def
                    << ": " << Other.LI << '\n');
       break;
+    }
     case CR_Erase:
     case CR_Merge:
       if (isPrunedValue(i, Other)) {
@@ -1762,21 +1780,41 @@ void JoinVals::pruneValues(JoinVals &Other,
 void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
                            SmallVectorImpl<unsigned> &ShrinkRegs) {
   for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
-    if (Vals[i].Resolution != CR_Erase)
-      continue;
+    // Get the def location before markUnused() below invalidates it.
     SlotIndex Def = LI.getValNumInfo(i)->def;
-    MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
-    assert(MI && "No instruction to erase");
-    if (MI->isCopy()) {
-      unsigned Reg = MI->getOperand(1).getReg();
-      if (TargetRegisterInfo::isVirtualRegister(Reg) &&
-          Reg != CP.getSrcReg() && Reg != CP.getDstReg())
-        ShrinkRegs.push_back(Reg);
+    switch (Vals[i].Resolution) {
+    case CR_Keep:
+      // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
+      // longer. The IMPLICIT_DEF instructions are only inserted by
+      // PHIElimination to guarantee that all PHI predecessors have a value.
+      if (!Vals[i].IsImplicitDef || !Vals[i].Pruned)
+        break;
+      // Remove value number i from LI. Note that this VNInfo is still present
+      // in NewVNInfo, so it will appear as an unused value number in the final
+      // joined interval.
+      LI.getValNumInfo(i)->markUnused();
+      LI.removeValNo(LI.getValNumInfo(i));
+      DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n');
+      // FALL THROUGH.
+
+    case CR_Erase: {
+      MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
+      assert(MI && "No instruction to erase");
+      if (MI->isCopy()) {
+        unsigned Reg = MI->getOperand(1).getReg();
+        if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+            Reg != CP.getSrcReg() && Reg != CP.getDstReg())
+          ShrinkRegs.push_back(Reg);
+      }
+      ErasedInstrs.insert(MI);
+      DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
+      LIS->RemoveMachineInstrFromMaps(MI);
+      MI->eraseFromParent();
+      break;
+    }
+    default:
+      break;
     }
-    ErasedInstrs.insert(MI);
-    DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
-    LIS->RemoveMachineInstrFromMaps(MI);
-    MI->eraseFromParent();
   }
 }
 
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index aa45a6861c..8dcbf83353 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -22,6 +22,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAGILP.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
@@ -30,6 +31,7 @@
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -933,3 +935,94 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
 std::string ScheduleDAGInstrs::getDAGName() const {
   return "dag." + BB->getFullName();
 }
+
+namespace {
+/// \brief Manage the stack used by a reverse depth-first search over the DAG.
+class SchedDAGReverseDFS {
+  std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
+public:
+  bool isComplete() const { return DFSStack.empty(); }
+
+  void follow(const SUnit *SU) {
+    DFSStack.push_back(std::make_pair(SU, SU->Preds.begin()));
+  }
+  void advance() { ++DFSStack.back().second; }
+
+  void backtrack() { DFSStack.pop_back(); }
+
+  const SUnit *getCurr() const { return DFSStack.back().first; }
+
+  SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
+
+  SUnit::const_pred_iterator getPredEnd() const {
+    return getCurr()->Preds.end();
+  }
+};
+} // anonymous
+
+void ScheduleDAGILP::resize(unsigned NumSUnits) {
+  ILPValues.resize(NumSUnits);
+}
+
+ILPValue ScheduleDAGILP::getILP(const SUnit *SU) {
+  return ILPValues[SU->NodeNum];
+}
+
+// A leaf node has an ILP of 1/1.
+static ILPValue initILP(const SUnit *SU) {
+  unsigned Cnt = SU->getInstr()->isTransient() ? 0 : 1;
+  return ILPValue(Cnt, 1 + SU->getDepth());
+}
+
+/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
+/// search from this root.
+void ScheduleDAGILP::computeILP(const SUnit *Root) {
+  if (!IsBottomUp)
+    llvm_unreachable("Top-down ILP metric is unimplemnted");
+
+  SchedDAGReverseDFS DFS;
+  // Mark a node visited by validating it.
+  ILPValues[Root->NodeNum] = initILP(Root);
+  DFS.follow(Root);
+  for (;;) {
+    // Traverse the leftmost path as far as possible.
+    while (DFS.getPred() != DFS.getPredEnd()) {
+      const SUnit *PredSU = DFS.getPred()->getSUnit();
+      DFS.advance();
+      // If the pred is already valid, skip it.
+      if (ILPValues[PredSU->NodeNum].isValid())
+        continue;
+      ILPValues[PredSU->NodeNum] = initILP(PredSU);
+      DFS.follow(PredSU);
+    }
+    // Visit the top of the stack in postorder and backtrack.
+    unsigned PredCount = ILPValues[DFS.getCurr()->NodeNum].InstrCount;
+    DFS.backtrack();
+    if (DFS.isComplete())
+      break;
+    // Add the recently finished predecessor's bottom-up descendent count.
+    ILPValues[DFS.getCurr()->NodeNum].InstrCount += PredCount;
+  }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ILPValue::print(raw_ostream &OS) const {
+  if (!isValid())
+    OS << "BADILP";
+  OS << InstrCount << " / " << Cycles << " = "
+     << format("%g", ((double)InstrCount / Cycles));
+}
+
+void ILPValue::dump() const {
+  dbgs() << *this << '\n';
+}
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
+  Val.print(OS);
+  return OS;
+}
+
+} // namespace llvm
+#endif // !NDEBUG || LLVM_ENABLE_DUMP
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bb4f365b21..2ec129f730 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3153,6 +3153,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp3 = Node->getOperand(1);
     if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
         (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
+         // If div is legal, it's better to do the normal expansion
+         !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) &&
          useDivRem(Node, isSigned, false))) {
       Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
     } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index bb54fd24e2..6bcb3b25e9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -124,6 +124,10 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
         // there are only two nodes left, i.e. Lo and Hi.
         SDValue LHS = Vals[Slot];
         SDValue RHS = Vals[Slot + 1];
+
+        if (TLI.isBigEndian())
+          std::swap(LHS, RHS);
+
         Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl,
                                    EVT::getIntegerVT(
                                      *DAG.getContext(),
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 79cfcdfe0e..183416f3fd 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3449,9 +3449,12 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
   EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
                                    IsZeroVal, MemcpyStrSrc,
                                    DAG.getMachineFunction());
+  Type *vtType = VT.isExtended() ? VT.getTypeForEVT(*DAG.getContext()) : NULL;
+  unsigned AS = (vtType && vtType->isPointerTy()) ?
+    cast<PointerType>(vtType)->getAddressSpace() : 0;
 
   if (VT == MVT::Other) {
-    if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment() ||
+    if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) ||
         TLI.allowsUnalignedMemoryAccesses(VT)) {
       VT = TLI.getPointerTy();
     } else {
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 6df4a0aa2a..4d30f04598 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -93,9 +93,9 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
                                                     Flags,
                                                     SectionKind::getDataRel(),
                                                     0, Label->getName());
-  unsigned Size = TM.getDataLayout()->getPointerSize();
+  unsigned Size = TM.getDataLayout()->getPointerSize(0);
   Streamer.SwitchSection(Sec);
-  Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment());
+  Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment(0));
   Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
   const MCExpr *E = MCConstantExpr::Create(Size, getContext());
   Streamer.EmitELFSize(Label, E);
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index c5c46815a2..94a2542e7a 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -17,6 +17,7 @@
 
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ADT/SmallString.h"
@@ -267,7 +268,7 @@ public:
 void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
                        const std::vector<std::string> &InputArgv) {
   clear();  // Free the old contents.
-  unsigned PtrSize = EE->getDataLayout()->getPointerSize();
+  unsigned PtrSize = EE->getDataLayout()->getPointerSize(0);
   Array = new char[(InputArgv.size()+1)*PtrSize];
 
   DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array << "\n");
@@ -342,7 +343,7 @@ void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) {
 #ifndef NDEBUG
 /// isTargetNullPtr - Return whether the target pointer stored at Loc is null.
 static bool isTargetNullPtr(ExecutionEngine *EE, void *Loc) {
-  unsigned PtrSize = EE->getDataLayout()->getPointerSize();
+  unsigned PtrSize = EE->getDataLayout()->getPointerSize(0);
   for (unsigned i = 0; i < PtrSize; ++i)
     if (*(i + (uint8_t*)Loc))
       return false;
@@ -644,13 +645,16 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     }
     case Instruction::PtrToInt: {
       GenericValue GV = getConstantValue(Op0);
-      uint32_t PtrWidth = TD->getPointerSizeInBits();
+      unsigned AS = cast<PointerType>(CE->getOperand(1)->getType())
+        ->getAddressSpace();
+      uint32_t PtrWidth = TD->getPointerSizeInBits(AS);
       GV.IntVal = APInt(PtrWidth, uintptr_t(GV.PointerVal));
       return GV;
     }
     case Instruction::IntToPtr: {
       GenericValue GV = getConstantValue(Op0);
-      uint32_t PtrWidth = TD->getPointerSizeInBits();
+      unsigned AS = cast<PointerType>(CE->getType())->getAddressSpace();
+      uint32_t PtrWidth = TD->getPointerSizeInBits(AS);
       if (PtrWidth != GV.IntVal.getBitWidth())
         GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth);
       assert(GV.IntVal.getBitWidth() <= 64 && "Bad pointer width");
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 5202b09165..326bf79c58 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -1054,7 +1054,8 @@ GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, Type *DstTy,
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
   assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction");
 
-  uint32_t PtrSize = TD.getPointerSizeInBits();
+  unsigned AS = cast<PointerType>(DstTy)->getAddressSpace();
+  uint32_t PtrSize = TD.getPointerSizeInBits(AS);
   if (PtrSize != Src.IntVal.getBitWidth())
     Src.IntVal = Src.IntVal.zextOrTrunc(PtrSize);
 
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index e3b90fdf78..e70efd0886 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -378,7 +378,7 @@ GenericValue lle_X_sprintf(FunctionType *FT,
       case 'x': case 'X':
         if (HowLong >= 1) {
           if (HowLong == 1 &&
-              TheInterpreter->getDataLayout()->getPointerSizeInBits() == 64 &&
+              TheInterpreter->getDataLayout()->getPointerSizeInBits(0) == 64 &&
               sizeof(long) < sizeof(int64_t)) {
             // Make sure we use %lld with a 64 bit argument because we might be
             // compiling LLI on a 32 bit compiler.
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index 19c197903a..bcd5b26365 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -14,7 +14,9 @@
 
 #include "JIT.h"
 #include "JITDwarfEmitter.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -66,7 +68,7 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F,
 void
 JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
                                 const std::vector<MachineMove> &Moves) const {
-  unsigned PointerSize = TD->getPointerSize();
+  unsigned PointerSize = TD->getPointerSize(0);
   int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
           PointerSize : -PointerSize;
   MCSymbol *BaseLabel = 0;
@@ -378,7 +380,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
   for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
     SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
 
-  unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize();
+  unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize(0);
 
   unsigned TypeOffset = sizeof(int8_t) + // Call site format
                         // Call-site table length
@@ -454,12 +456,12 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
     const GlobalVariable *GV = TypeInfos[M - 1];
 
     if (GV) {
-      if (TD->getPointerSize() == sizeof(int32_t))
+      if (TD->getPointerSize(GV->getType()->getAddressSpace()) == sizeof(int32_t))
         JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV));
       else
         JCE->emitInt64((intptr_t)Jit.getOrEmitGlobalVariable(GV));
     } else {
-      if (TD->getPointerSize() == sizeof(int32_t))
+      if (TD->getPointerSize(0) == sizeof(int32_t))
         JCE->emitInt32(0);
       else
         JCE->emitInt64(0);
@@ -481,7 +483,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
 
 unsigned char*
 JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
-  unsigned PointerSize = TD->getPointerSize();
+  unsigned PointerSize = TD->getPointerSize(0);
   int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
           PointerSize : -PointerSize;
 
@@ -541,7 +543,7 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality,
                              unsigned char* StartFunction,
                              unsigned char* EndFunction,
                              unsigned char* ExceptionTable) const {
-  unsigned PointerSize = TD->getPointerSize();
+  unsigned PointerSize = TD->getPointerSize(0);
 
   // EH frame header.
   unsigned char* StartEHPtr = (unsigned char*)JCE->getCurrentPCValue();
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index eb69693359..c1f8baed1a 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -378,17 +378,17 @@ void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID,
 
 void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE,
                                              uint64_t Value) {
-    // Ignore relocations for sections that were not loaded
-    if (Sections[RE.SectionID].Address != 0) {
-      uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset;
-      DEBUG(dbgs() << "\tSectionID: " << RE.SectionID
-            << " + " << RE.Offset << " (" << format("%p", Target) << ")"
-            << " RelType: " << RE.RelType
-            << " Addend: " << RE.Addend
-            << "\n");
-
-      resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset,
-                        Value, RE.RelType, RE.Addend);
+  // Ignore relocations for sections that were not loaded
+  if (Sections[RE.SectionID].Address != 0) {
+    uint8_t *Target = Sections[RE.SectionID].Address + RE.Offset;
+    DEBUG(dbgs() << "\tSectionID: " << RE.SectionID
+          << " + " << RE.Offset << " (" << format("%p", Target) << ")"
+          << " RelType: " << RE.RelType
+          << " Addend: " << RE.Addend
+          << "\n");
+
+    resolveRelocation(Target, Sections[RE.SectionID].LoadAddress + RE.Offset,
+                      Value, RE.RelType, RE.Addend);
   }
 }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 414957c356..08aba64e46 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -413,7 +413,13 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
           if (si == Obj.end_sections())
             llvm_unreachable("Symbol section not found, bad object file format!");
           DEBUG(dbgs() << "\t\tThis is section symbol\n");
-          Value.SectionID = findOrEmitSection(Obj, (*si), true, ObjSectionToID);
+          // Default to 'true' in case isText fails (though it never does).
+          bool isCode = true;
+          si->isText(isCode);
+          Value.SectionID = findOrEmitSection(Obj, 
+                                              (*si), 
+                                              isCode, 
+                                              ObjSectionToID);
           Value.Addend = Addend;
           break;
         }
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index d9fe36d9d8..16d1fff8a6 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -258,6 +258,7 @@ public:
   virtual void EmitPad(int64_t Offset);
   virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool);
 
+  virtual void EmitTCEntry(const MCSymbol &S);
 
   virtual void EmitInstruction(const MCInst &Inst);
 
@@ -1327,6 +1328,14 @@ void MCAsmStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitTCEntry(const MCSymbol &S) {
+  OS << "\t.tc ";
+  OS << S.getName();
+  OS << "[TC],";
+  OS << S.getName();
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
   assert(getCurrentSection() && "Cannot emit contents before setting section!");
 
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 1d28ec45cb..cc2c863871 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -103,6 +103,8 @@ public:
 
   virtual void EmitFileDirective(StringRef Filename);
 
+  virtual void EmitTCEntry(const MCSymbol &S);
+
   virtual void FinishImpl();
 
 private:
@@ -484,6 +486,12 @@ void MCELFStreamer::FinishImpl() {
   this->MCObjectStreamer::FinishImpl();
 }
 
+void MCELFStreamer::EmitTCEntry(const MCSymbol &S)
+{
+  // Creates a R_PPC64_TOC relocation
+  MCObjectStreamer::EmitSymbolValue(&S, 8, 0);
+}
+
 MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB,
                                     raw_ostream &OS, MCCodeEmitter *CE,
                                     bool RelaxAll, bool NoExecStack) {
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 9890e510c0..9313137677 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -133,6 +133,18 @@ private:
   /// IsDarwin - is Darwin compatibility enabled?
   bool IsDarwin;
 
+  /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
+  bool ParsingInlineAsm;
+
+  /// IsInstruction - Was the last parsed statement an instruction?
+  bool IsInstruction;
+
+  /// ParsedOperands - The parsed operands from the last parsed statement.
+  SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
+
+  /// Opcode - The opcode from the last parsed instruction.
+  unsigned Opcode;
+
 public:
   AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
             const MCAsmInfo &MAI);
@@ -171,6 +183,21 @@ public:
 
   virtual const AsmToken &Lex();
 
+  bool ParseStatement();
+  void setParsingInlineAsm(bool V) { ParsingInlineAsm = V; }
+  unsigned getNumParsedOperands() { return ParsedOperands.size(); }
+  MCParsedAsmOperand &getParsedOperand(unsigned OpNum) {
+    assert (ParsedOperands.size() > OpNum);
+    return *ParsedOperands[OpNum];
+  }
+  void freeParsedOperands() {
+    for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
+      delete ParsedOperands[i];
+    ParsedOperands.clear();
+  }
+  bool isInstruction() { return IsInstruction; }
+  unsigned getOpcode() { return Opcode; }
+
   bool ParseExpression(const MCExpr *&Res);
   virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc);
   virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
@@ -181,7 +208,6 @@ public:
 private:
   void CheckForValidSection();
 
-  bool ParseStatement();
   void EatToEndOfLine();
   bool ParseCppHashLineFilenameComment(const SMLoc &L);
 
@@ -419,7 +445,8 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx,
   : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
     GenericParser(new GenericAsmParser), PlatformParser(0),
     CurBuffer(0), MacrosEnabled(true), CppHashLineNumber(0),
-    AssemblerDialect(~0U), IsDarwin(false) {
+    AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false),
+    IsInstruction(false), Opcode(0) {
   // Save the old handler.
   SavedDiagHandler = SrcMgr.getDiagHandler();
   SavedDiagContext = SrcMgr.getDiagContext();
@@ -618,7 +645,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
 }
 
 void AsmParser::CheckForValidSection() {
-  if (!getStreamer().getCurrentSection()) {
+  if (!ParsingInlineAsm && !getStreamer().getCurrentSection()) {
     TokError("expected section directive before assembly directive");
     Out.SwitchSection(Ctx.getMachOSection(
                         "__TEXT", "__text",
@@ -1335,12 +1362,11 @@ bool AsmParser::ParseStatement() {
   CheckForValidSection();
 
   // Canonicalize the opcode to lower case.
-  SmallString<128> Opcode;
+  SmallString<128> OpcodeStr;
   for (unsigned i = 0, e = IDVal.size(); i != e; ++i)
-    Opcode.push_back(tolower(IDVal[i]));
+    OpcodeStr.push_back(tolower(IDVal[i]));
 
-  SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
-  bool HadError = getTargetParser().ParseInstruction(Opcode.str(), IDLoc,
+  bool HadError = getTargetParser().ParseInstruction(OpcodeStr.str(), IDLoc,
                                                      ParsedOperands);
 
   // Dump the parsed representation, if requested.
@@ -1371,13 +1397,18 @@ bool AsmParser::ParseStatement() {
   }
 
   // If parsing succeeded, match the instruction.
-  if (!HadError)
-    HadError = getTargetParser().MatchAndEmitInstruction(IDLoc, ParsedOperands,
-                                                         Out);
-
-  // Free any parsed operands.
-  for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
-    delete ParsedOperands[i];
+  if (!HadError) {
+    unsigned ErrorInfo;
+    HadError = getTargetParser().MatchAndEmitInstruction(IDLoc, Opcode,
+                                                         ParsedOperands, Out,
+                                                         ErrorInfo,
+                                                         ParsingInlineAsm);
+  }
+
+  // Free any parsed operands.  If parsing ms-style inline assembly it is the
+  // responsibility of the caller (i.e., clang) to free the parsed operands.
+  if (!ParsingInlineAsm)
+    freeParsedOperands();
 
   // Don't skip the rest of the line, the instruction parser is responsible for
   // that.
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 0bac24dc3a..afece0ba55 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -561,6 +561,10 @@ void MCStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool) {
   abort();
 }
 
+void MCStreamer::EmitTCEntry(const MCSymbol &S) {
+  llvm_unreachable("Unsupported method");
+}
+
 /// EmitRawText - If this file is backed by an assembly streamer, this dumps
 /// the specified string in the output .s file.  This capability is
 /// indicated by the hasRawTextSupport() predicate.
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index abfaecc279..2cc7a58462 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -98,6 +98,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) {
   case BGP: return "bgp";
   case BGQ: return "bgq";
   case Freescale: return "fsl";
+  case IBM: return "ibm";
   }
 
   llvm_unreachable("Invalid VendorType!");
@@ -128,6 +129,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case NativeClient: return "nacl";
   case CNK: return "cnk";
   case Bitrig: return "bitrig";
+  case AIX: return "aix";
   }
 
   llvm_unreachable("Invalid OSType");
@@ -278,6 +280,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
     .Case("bgp", Triple::BGP)
     .Case("bgq", Triple::BGQ)
     .Case("fsl", Triple::Freescale)
+    .Case("ibm", Triple::IBM)
     .Default(Triple::UnknownVendor);
 }
 
@@ -304,6 +307,7 @@ static Triple::OSType parseOS(StringRef OSName) {
     .StartsWith("nacl", Triple::NativeClient)
     .StartsWith("cnk", Triple::CNK)
     .StartsWith("bitrig", Triple::Bitrig)
+    .StartsWith("aix", Triple::AIX)
     .Default(Triple::UnknownOS);
 }
 
diff --git a/lib/Target/ARM/ARMELFWriterInfo.cpp b/lib/Target/ARM/ARMELFWriterInfo.cpp
index d88bf0c8fa..7bca0edf91 100644
--- a/lib/Target/ARM/ARMELFWriterInfo.cpp
+++ b/lib/Target/ARM/ARMELFWriterInfo.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 ARMELFWriterInfo::ARMELFWriterInfo(TargetMachine &TM)
-  : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits() == 64,
+  : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits(0) == 64,
                         TM.getDataLayout()->isLittleEndian()) {
 }
 
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 3f82f8ce5a..a45f0c2b9e 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -132,6 +132,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
   setOperationAction(ISD::SELECT,            VT, Expand);
   setOperationAction(ISD::SELECT_CC,         VT, Expand);
+  setOperationAction(ISD::VSELECT,           VT, Expand);
   setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
   if (VT.isInteger()) {
     setOperationAction(ISD::SHL, VT, Custom);
@@ -1864,6 +1865,14 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     }
   }
 
+  // If Caller's vararg or byval argument has been split between registers and
+  // stack, do not perform tail call, since part of the argument is in caller's
+  // local frame.
+  const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
+                                      getInfo<ARMFunctionInfo>();
+  if (AFI_Caller->getVarArgsRegSaveSize())
+    return false;
+
   // If the callee takes no arguments then go on to check the results of the
   // call.
   if (!Outs.empty()) {
@@ -4449,9 +4458,26 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
       // If we are VDUPing a value that comes directly from a vector, that will
       // cause an unnecessary move to and from a GPR, where instead we could
       // just use VDUPLANE.
-      if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT)
-        N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+      if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+        // We need to create a new undef vector to use for the VDUPLANE if the
+        // size of the vector from which we get the value is different than the
+        // size of the vector that we need to create. We will insert the element
+        // such that the register coalescer will remove unnecessary copies.
+        if (VT != Value->getOperand(0).getValueType()) {
+          ConstantSDNode *constIndex;
+          constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
+          assert(constIndex && "The index is not a constant!");
+          unsigned index = constIndex->getAPIntValue().getLimitedValue() %
+                             VT.getVectorNumElements();
+          N =  DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+                 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
+                        Value, DAG.getConstant(index, MVT::i32)),
+                           DAG.getConstant(index, MVT::i32));
+        } else {
+          N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
                         Value->getOperand(0), Value->getOperand(1));
+        }
+      }
       else
         N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
 
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 93e5eca625..0eec8622e9 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -259,9 +259,10 @@ public:
 
   unsigned checkTargetMatchPredicate(MCInst &Inst);
 
-  bool MatchAndEmitInstruction(SMLoc IDLoc,
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                               MCStreamer &Out);
+                               MCStreamer &Out, unsigned &ErrorInfo,
+                               bool MatchingInlineAsm);
 };
 } // end anonymous namespace
 
@@ -7474,17 +7475,14 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
 
 static const char *getSubtargetFeatureName(unsigned Val);
 bool ARMAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc,
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                         SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                        MCStreamer &Out) {
+                        MCStreamer &Out, unsigned &ErrorInfo,
+                        bool MatchingInlineAsm) {
   MCInst Inst;
-  unsigned Kind;
-  unsigned ErrorInfo;
   unsigned MatchResult;
-  MatchInstMapAndConstraints MapAndConstraints;
-  MatchResult = MatchInstructionImpl(Operands, Kind, Inst,
-                                     MapAndConstraints, ErrorInfo,
-                                     /*matchingInlineAsm*/ false);
+  MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+                                     MatchingInlineAsm);
   switch (MatchResult) {
   default: break;
   case Match_Success:
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 096e2bc13b..5b17a0f927 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -509,9 +509,7 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
       HANDLE_ATTR(NonLazyBind);
 #undef HANDLE_ATTR
       if (attrs.hasAttribute(Attributes::StackAlignment))
-        Out << "B.addStackAlignmentAttr(Attribute::constructStackAlignmentFromInt("
-            << attrs.getStackAlignment()
-            << "))";
+        Out << "B.addStackAlignmentAttr(" << attrs.getStackAlignment() << ")";
       nl(Out);
       attrs.removeAttribute(Attributes::StackAlignment);
       assert(!attrs.hasAttributes() && "Unhandled attribute!");
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index d1e18b24c3..9e28a3d7d0 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -44,9 +44,10 @@ class MBlazeAsmParser : public MCTargetAsmParser {
 
   bool ParseDirectiveWord(unsigned Size, SMLoc L);
 
-  bool MatchAndEmitInstruction(SMLoc IDLoc,
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                               MCStreamer &Out);
+                               MCStreamer &Out, unsigned &ErrorInfo,
+                               bool MatchingInlineAsm);
 
   /// @name Auto-generated Match Functions
   /// {
@@ -312,15 +313,13 @@ static unsigned MatchRegisterName(StringRef Name);
 /// }
 //
 bool MBlazeAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc,
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                         SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                        MCStreamer &Out) {
+                        MCStreamer &Out, unsigned &ErrorInfo,
+                        bool MatchingInlineAsm) {
   MCInst Inst;
-  unsigned Kind;
-  unsigned ErrorInfo;
-  MatchInstMapAndConstraints MapAndConstraints;
-  switch (MatchInstructionImpl(Operands, Kind, Inst, MapAndConstraints,
-                               ErrorInfo, /*matchingInlineAsm*/ false)) {
+  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo,
+                               MatchingInlineAsm)) {
   default: break;
   case Match_Success:
     Out.EmitInstruction(Inst);
diff --git a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
index 4ca30ba81f..6b575099e5 100644
--- a/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 MBlazeELFWriterInfo::MBlazeELFWriterInfo(TargetMachine &TM)
-  : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits() == 64,
+  : TargetELFWriterInfo(TM.getDataLayout()->getPointerSizeInBits(0) == 64,
                         TM.getDataLayout()->isLittleEndian()) {
 }
 
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
index 91aaf940e6..1c2e3b2661 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -83,7 +83,7 @@ bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
 #undef GET_INTRINSIC_OVERLOAD_TABLE
 }
 
-/// This defines the "getAttributes(ID id)" method.
+/// This defines the "getAttributes(LLVMContext &C, ID id)" method.
 #define GET_INTRINSIC_ATTRIBUTES
 #include "MBlazeGenIntrinsics.inc"
 #undef GET_INTRINSIC_ATTRIBUTES
@@ -104,7 +104,8 @@ Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
                                                 Type **Tys,
                                                 unsigned numTy) const {
   assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
-  AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID);
+  AttrListPtr AList = getAttributes(M->getContext(),
+                                    (mblazeIntrinsic::ID) IntrID);
   return cast<Function>(M->getOrInsertFunction(getName(IntrID),
                                                getType(M->getContext(), IntrID),
                                                AList));
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index fc677aec38..113378a5f3 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -881,7 +881,7 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
 
   if (ReturnAddrIndex == 0) {
     // Set up a frame object for the return address.
-    uint64_t SlotSize = TD->getPointerSize();
+    uint64_t SlotSize = TD->getPointerSize(0);
     ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
                                                            true);
     FuncInfo->setRAIndex(ReturnAddrIndex);
@@ -901,7 +901,7 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
   if (Depth > 0) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
     SDValue Offset =
-      DAG.getConstant(TD->getPointerSize(), MVT::i16);
+      DAG.getConstant(TD->getPointerSize(0), MVT::i16);
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index c2980ffeea..00649d2f18 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -67,9 +67,10 @@ class MipsAsmParser : public MCTargetAsmParser {
 #define GET_ASSEMBLER_HEADER
 #include "MipsGenAsmMatcher.inc"
 
-  bool MatchAndEmitInstruction(SMLoc IDLoc,
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                               MCStreamer &Out);
+                               MCStreamer &Out, unsigned &ErrorInfo,
+                               bool MatchingInlineAsm);
 
   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
 
@@ -452,16 +453,13 @@ void MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc,
 }
 
 bool MipsAsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc,
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                         SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                        MCStreamer &Out) {
+                        MCStreamer &Out, unsigned &ErrorInfo,
+                        bool MatchingInlineAsm) {
   MCInst Inst;
-  unsigned Kind;
-  unsigned ErrorInfo;
-  MatchInstMapAndConstraints MapAndConstraints;
-  unsigned MatchResult = MatchInstructionImpl(Operands, Kind, Inst,
-                                              MapAndConstraints, ErrorInfo,
-                                              /*matchingInlineAsm*/ false);
+  unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+                                              MatchingInlineAsm);
 
   switch (MatchResult) {
   default: break;
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 127c5b89e8..8991433005 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -58,12 +58,22 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator I, DebugLoc DL,
                                   unsigned DestReg, unsigned SrcReg,
                                   bool KillSrc) const {
-  unsigned Opc = 0, ZeroReg = 0;
+  unsigned Opc = 0;
+
+  if (Mips::CPU16RegsRegClass.contains(DestReg) &&
+      Mips::CPURegsRegClass.contains(SrcReg))
+    Opc = Mips::MoveR3216;
+  else if (Mips::CPURegsRegClass.contains(DestReg) &&
+           Mips::CPU16RegsRegClass.contains(SrcReg))
+    Opc = Mips::Move32R16;
+  else if ((SrcReg == Mips::HI) &&
+           (Mips::CPU16RegsRegClass.contains(DestReg)))
+    Opc = Mips::Mfhi16, SrcReg = 0;
+
+  else if ((SrcReg == Mips::LO) &&
+           (Mips::CPU16RegsRegClass.contains(DestReg)))
+    Opc = Mips::Mflo16, SrcReg = 0;
 
-  if (Mips::CPURegsRegClass.contains(DestReg)) { // Copy to CPU Reg.
-    if (Mips::CPURegsRegClass.contains(SrcReg))
-      Opc = Mips::Move32R16;
-  }
 
   assert(Opc && "Cannot copy registers");
 
@@ -72,9 +82,6 @@ void Mips16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   if (DestReg)
     MIB.addReg(DestReg, RegState::Define);
 
-  if (ZeroReg)
-    MIB.addReg(ZeroReg);
-
   if (SrcReg)
     MIB.addReg(SrcReg, getKillRegState(KillSrc));
 }
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index e1c90466fb..eba201a0ea 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -118,6 +118,14 @@ class FRR16_ins<bits<5> f, string asmstr, InstrItinClass itin> :
   FRR16<f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry),
         !strconcat(asmstr, "\t$rx, $ry"), [], itin> {
 }
+
+//
+// maybe refactor but need a $zero as a dummy first parameter
+//
+class FRR16_div_ins<bits<5> f, string asmstr, InstrItinClass itin> :
+  FRR16<f, (outs ), (ins CPU16Regs:$rx, CPU16Regs:$ry),
+        !strconcat(asmstr, "\t$$zero, $rx, $ry"), [], itin> ;
+
 class FRR16_M_ins<bits<5> f, string asmstr,
                   InstrItinClass itin> :
   FRR16<f, (outs CPU16Regs:$rx), (ins),
@@ -196,6 +204,24 @@ def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>;
 // To do a bitwise logical AND.
 
 def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
+//
+// Format: DIV rx, ry MIPS16e
+// Purpose: Divide Word
+// To divide 32-bit signed integers.
+//
+def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> {
+  let Defs = [HI, LO];
+}
+
+//
+// Format: DIVU rx, ry MIPS16e
+// Purpose: Divide Unsigned Word
+// To divide 32-bit unsigned integers.
+//
+def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> {
+  let Defs = [HI, LO];
+}
+
 
 //
 // Format: JR ra MIPS16e
@@ -551,5 +577,20 @@ def RetRA16 : MipsPseudo16<(outs), (ins), "", [(MipsRet)]>;
 // Small immediates
 def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>;
 
+//
+// MipsDivRem
+//
+def: Mips16Pat
+  <(MipsDivRem CPU16Regs:$rx, CPU16Regs:$ry),
+   (DivRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>;
+
+//
+// MipsDivRemU
+//
+def: Mips16Pat
+  <(MipsDivRemU CPU16Regs:$rx, CPU16Regs:$ry),
+   (DivuRxRy16 CPU16Regs:$rx, CPU16Regs:$ry)>;
+
+
 def: Mips16Pat<(add CPU16Regs:$hi, (MipsLo tglobaladdr:$lo)),
                (AddiuRxRxImmX16 CPU16Regs:$hi, tglobaladdr:$lo)>;
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index d3dfb35e26..c46094569e 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -126,8 +126,10 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
       return Base;
 
     // Truncate/sext the offset to the pointer size.
-    if (TD.getPointerSizeInBits() != 64) {
-      int SExtAmount = 64-TD.getPointerSizeInBits();
+    unsigned AS = PtrVal->getType()->isPointerTy() ?
+      cast<PointerType>(PtrVal->getType())->getAddressSpace() : 0;
+    if (TD.getPointerSizeInBits(AS) != 64) {
+      int SExtAmount = 64-TD.getPointerSizeInBits(AS);
       Offset = (Offset << SExtAmount) >> SExtAmount;
     }
 
@@ -1378,7 +1380,7 @@ getOpenCLAlignment(const DataLayout *TD,
 
   const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
   if (FTy)
-    return TD->getPointerPrefAlignment();
+    return TD->getPointerPrefAlignment(0);
   return TD->getPrefTypeAlignment(Ty);
 }
 
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 914a9b0dce..d8abd9fba0 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -439,7 +439,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
 bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
   const DataLayout *TD = TM.getDataLayout();
 
-  bool isPPC64 = TD->getPointerSizeInBits() == 64;
+  bool isPPC64 = TD->getPointerSizeInBits(0) == 64;
 
   if (isPPC64 && !TOC.empty()) {
     const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc",
@@ -451,8 +451,8 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
     for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
          E = TOC.end(); I != E; ++I) {
       OutStreamer.EmitLabel(I->second);
-      OutStreamer.EmitRawText("\t.tc " + Twine(I->first->getName()) +
-                              "[TC]," + I->first->getName());
+      MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
+      OutStreamer.EmitTCEntry(*S);
     }
   }
 
@@ -545,7 +545,7 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
 
 void PPCDarwinAsmPrinter::
 EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
-  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits(0) == 64;
   
   const TargetLoweringObjectFileMachO &TLOFMacho = 
     static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
@@ -640,7 +640,7 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
 
 
 bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
-  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits(0) == 64;
 
   // Darwin/PPC always uses mach-o.
   const TargetLoweringObjectFileMachO &TLOFMacho = 
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 459c3589d3..d123211473 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -498,7 +498,7 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
     } else if (CRSpillFrameIdx) {
       FrameIdx = CRSpillFrameIdx;
     } else {
-      MachineFrameInfo *MFI = ((MachineFunction &)MF).getFrameInfo();
+      MachineFrameInfo *MFI = (const_cast<MachineFunction &>(MF)).getFrameInfo();
       FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
       CRSpillFrameIdx = FrameIdx;
     }
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 219cbf1afc..393178a469 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -56,13 +56,21 @@ LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef TD) {
 }
 
 unsigned LLVMPointerSize(LLVMTargetDataRef TD) {
-  return unwrap(TD)->getPointerSize();
+  return unwrap(TD)->getPointerSize(0);
+}
+
+unsigned LLVMPointerSizeForAS(LLVMTargetDataRef TD, unsigned AS) {
+  return unwrap(TD)->getPointerSize(AS);
 }
 
 LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) {
   return wrap(unwrap(TD)->getIntPtrType(getGlobalContext()));
 }
 
+LLVMTypeRef LLVMIntPtrTypeForAS(LLVMTargetDataRef TD, unsigned AS) {
+  return wrap(unwrap(TD)->getIntPtrType(getGlobalContext(), AS));
+}
+
 unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
   return unwrap(TD)->getTypeSizeInBits(unwrap(Ty));
 }
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index df34359a66..683d694909 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -40,8 +40,8 @@ private:
 
   bool Error(SMLoc L, const Twine &Msg,
              ArrayRef<SMRange> Ranges = ArrayRef<SMRange>(),
-             bool matchingInlineAsm = false) {
-    if (matchingInlineAsm) return true;
+             bool MatchingInlineAsm = false) {
+    if (MatchingInlineAsm) return true;
     return Parser.Error(L, Msg, Ranges);
   }
 
@@ -63,14 +63,10 @@ private:
   bool processInstruction(MCInst &Inst,
                           const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
 
-  bool MatchAndEmitInstruction(SMLoc IDLoc,
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                               MCStreamer &Out);
-  bool MatchInstruction(SMLoc IDLoc,
-                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                        MCStreamer &Out, unsigned &Kind, unsigned &Opcode,
-                        MatchInstMapAndConstraintsImpl &MapAndConstraints,
-                       unsigned &OrigErrorInfo, bool matchingInlineAsm = false);
+                               MCStreamer &Out, unsigned &ErrorInfo,
+                               bool MatchingInlineAsm);
 
   /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
   /// in 64bit mode or (%esi) or %es:(%esi) in 32bit mode.
@@ -756,6 +752,7 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
 
   const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
   if (getParser().ParseExpression(Disp, End)) return 0;
+  End = Parser.getTok().getLoc();
   return X86Operand::CreateMem(Disp, Start, End, Size);
 }
 
@@ -1520,24 +1517,10 @@ processInstruction(MCInst &Inst,
 }
 
 bool X86AsmParser::
-MatchAndEmitInstruction(SMLoc IDLoc,
+MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                         SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                        MCStreamer &Out) {
-  unsigned Kind;
-  unsigned Opcode;
-  unsigned ErrorInfo;
-  MatchInstMapAndConstraints MapAndConstraints;
-  bool Error = MatchInstruction(IDLoc, Operands, Out, Kind, Opcode,
-                                MapAndConstraints, ErrorInfo);
-  return Error;
-}
-
-bool X86AsmParser::
-MatchInstruction(SMLoc IDLoc,
-                 SmallVectorImpl<MCParsedAsmOperand*> &Operands,
-                 MCStreamer &Out, unsigned &Kind, unsigned &Opcode,
-  SmallVectorImpl<std::pair< unsigned, std::string > > &MapAndConstraints,
-                 unsigned &OrigErrorInfo, bool matchingInlineAsm) {
+                        MCStreamer &Out, unsigned &ErrorInfo,
+                        bool MatchingInlineAsm) {
   assert(!Operands.empty() && "Unexpect empty operand list!");
   X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
   assert(Op->isToken() && "Leading operand should always be a mnemonic!");
@@ -1554,7 +1537,7 @@ MatchInstruction(SMLoc IDLoc,
     MCInst Inst;
     Inst.setOpcode(X86::WAIT);
     Inst.setLoc(IDLoc);
-    if (!matchingInlineAsm)
+    if (!MatchingInlineAsm)
       Out.EmitInstruction(Inst);
 
     const char *Repl =
@@ -1577,26 +1560,26 @@ MatchInstruction(SMLoc IDLoc,
   MCInst Inst;
 
   // First, try a direct match.
-  switch (MatchInstructionImpl(Operands, Kind, Inst, MapAndConstraints,
-                               OrigErrorInfo, matchingInlineAsm,
+  switch (MatchInstructionImpl(Operands, Inst,
+                               ErrorInfo, MatchingInlineAsm,
                                isParsingIntelSyntax())) {
   default: break;
   case Match_Success:
     // Some instructions need post-processing to, for example, tweak which
     // encoding is selected. Loop on it while changes happen so the
     // individual transformations can chain off each other.
-    if (!matchingInlineAsm)
+    if (!MatchingInlineAsm)
       while (processInstruction(Inst, Operands))
         ;
 
     Inst.setLoc(IDLoc);
-    if (!matchingInlineAsm)
+    if (!MatchingInlineAsm)
       Out.EmitInstruction(Inst);
     Opcode = Inst.getOpcode();
     return false;
   case Match_MissingFeature:
     Error(IDLoc, "instruction requires a CPU feature not currently enabled",
-          EmptyRanges, matchingInlineAsm);
+          EmptyRanges, MatchingInlineAsm);
     return true;
   case Match_InvalidOperand:
     WasOriginallyInvalidOperand = true;
@@ -1629,24 +1612,18 @@ MatchInstruction(SMLoc IDLoc,
   Tmp[Base.size()] = Suffixes[0];
   unsigned ErrorInfoIgnore;
   unsigned Match1, Match2, Match3, Match4;
-  unsigned tKind;
 
-  MatchInstMapAndConstraints tMapAndConstraints[4];
-  Match1 = MatchInstructionImpl(Operands, tKind, Inst, tMapAndConstraints[0],
-                                ErrorInfoIgnore, isParsingIntelSyntax());
-  if (Match1 == Match_Success) Kind = tKind;
+  Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+                                isParsingIntelSyntax());
   Tmp[Base.size()] = Suffixes[1];
-  Match2 = MatchInstructionImpl(Operands, tKind, Inst, tMapAndConstraints[1],
-                                ErrorInfoIgnore, isParsingIntelSyntax());
-  if (Match2 == Match_Success) Kind = tKind;
+  Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+                                isParsingIntelSyntax());
   Tmp[Base.size()] = Suffixes[2];
-  Match3 = MatchInstructionImpl(Operands, tKind, Inst, tMapAndConstraints[2],
-                                ErrorInfoIgnore, isParsingIntelSyntax());
-  if (Match3 == Match_Success) Kind = tKind;
+  Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+                                isParsingIntelSyntax());
   Tmp[Base.size()] = Suffixes[3];
-  Match4 = MatchInstructionImpl(Operands, tKind, Inst, tMapAndConstraints[3],
-                                ErrorInfoIgnore, isParsingIntelSyntax());
-  if (Match4 == Match_Success) Kind = tKind;
+  Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
+                                isParsingIntelSyntax());
 
   // Restore the old token.
   Op->setTokenValue(Base);
@@ -1659,10 +1636,9 @@ MatchInstruction(SMLoc IDLoc,
     (Match3 == Match_Success) + (Match4 == Match_Success);
   if (NumSuccessfulMatches == 1) {
     Inst.setLoc(IDLoc);
-    if (!matchingInlineAsm)
+    if (!MatchingInlineAsm)
       Out.EmitInstruction(Inst);
     Opcode = Inst.getOpcode();
-    // FIXME: Handle the map and constraints.
     return false;
   }
 
@@ -1689,7 +1665,7 @@ MatchInstruction(SMLoc IDLoc,
       OS << "'" << Base << MatchChars[i] << "'";
     }
     OS << ")";
-    Error(IDLoc, OS.str(), EmptyRanges, matchingInlineAsm);
+    Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
     return true;
   }
 
@@ -1700,28 +1676,28 @@ MatchInstruction(SMLoc IDLoc,
   if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
       (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
     if (!WasOriginallyInvalidOperand) {
-      ArrayRef<SMRange> Ranges = matchingInlineAsm ? EmptyRanges :
+      ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges :
         Op->getLocRange();
       return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
-                   Ranges, matchingInlineAsm);
+                   Ranges, MatchingInlineAsm);
     }
 
     // Recover location info for the operand if we know which was the problem.
-    if (OrigErrorInfo != ~0U) {
-      if (OrigErrorInfo >= Operands.size())
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= Operands.size())
         return Error(IDLoc, "too few operands for instruction",
-                     EmptyRanges, matchingInlineAsm);
+                     EmptyRanges, MatchingInlineAsm);
 
-      X86Operand *Operand = (X86Operand*)Operands[OrigErrorInfo];
+      X86Operand *Operand = (X86Operand*)Operands[ErrorInfo];
       if (Operand->getStartLoc().isValid()) {
         SMRange OperandRange = Operand->getLocRange();
         return Error(Operand->getStartLoc(), "invalid operand for instruction",
-                     OperandRange, matchingInlineAsm);
+                     OperandRange, MatchingInlineAsm);
       }
     }
 
     return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
-                 matchingInlineAsm);
+                 MatchingInlineAsm);
   }
 
   // If one instruction matched with a missing feature, report this as a
@@ -1729,7 +1705,7 @@ MatchInstruction(SMLoc IDLoc,
   if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
       (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
     Error(IDLoc, "instruction requires a CPU feature not currently enabled",
-          EmptyRanges, matchingInlineAsm);
+          EmptyRanges, MatchingInlineAsm);
     return true;
   }
 
@@ -1738,13 +1714,13 @@ MatchInstruction(SMLoc IDLoc,
   if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
       (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
     Error(IDLoc, "invalid operand for instruction", EmptyRanges,
-          matchingInlineAsm);
+          MatchingInlineAsm);
     return true;
   }
 
   // If all of these were an outright failure, report it in a useless way.
   Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
-        EmptyRanges, matchingInlineAsm);
+        EmptyRanges, MatchingInlineAsm);
   return true;
 }
 
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 9bac1eb7c0..6d3cd2411a 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -308,7 +308,9 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
   };
 
   // This CPU doesnt support long nops. If needed add more.
-  if (CPU == "geode") {
+  // FIXME: Can we get this from the subtarget somehow?
+  if (CPU == "generic" || CPU == "i386" || CPU == "i486" || CPU == "i586" ||
+      CPU == "pentium" || CPU == "pentium-mmx" || CPU == "geode") {
     for (uint64_t i = 0; i < Count; ++i)
       OW->Write8(0x90);
     return true;
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 47ffd8b3ef..2037188c83 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -722,7 +722,7 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
       for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
         OutStreamer.EmitLabel(Stubs[i].first);
         OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
-                                    TD->getPointerSize(), 0);
+                                    TD->getPointerSize(0), 0);
       }
       Stubs.clear();
     }
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index c0c3909030..0e1e05f449 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -7732,7 +7732,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
       IDX = DAG.getLoad(getPointerTy(), dl, Chain, IDX, MachinePointerInfo(),
                         false, false, false, 0);
 
-    SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize()),
+    SDValue Scale = DAG.getConstant(Log2_64_Ceil(TD->getPointerSize(0)),
                                     getPointerTy());
     IDX = DAG.getNode(ISD::SHL, dl, getPointerTy(), IDX, Scale);
 
@@ -9259,6 +9259,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
+  // X86 doesn't have an i8 cmov. If both operands are the result of a truncate
+  // widen the cmov and push the truncate through. This avoids introducing a new
+  // branch during isel and doesn't add any extensions.
+  if (Op.getValueType() == MVT::i8 &&
+      Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) {
+    SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
+    if (T1.getValueType() == T2.getValueType() &&
+        // Blacklist CopyFromReg to avoid partial register stalls.
+        T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){
+      SDVTList VTs = DAG.getVTList(T1.getValueType(), MVT::Glue);
+      SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, VTs, T2, T1, CC, Cond);
+      return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
+    }
+  }
+
   // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
   // condition is true.
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
@@ -9641,7 +9656,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   //   fp_offset         (48 - 48 + 8 * 16)
   //   overflow_arg_area (point to parameters coming in memory).
   //   reg_save_area
-  unsigned PointerSize = TD->getPointerSize(); // @LOCALMOD
+  unsigned PointerSize = TD->getPointerSize(0); // @LOCALMOD
   SmallVector<SDValue, 8> MemOps;
   SDValue FIN = Op.getOperand(1);
   // Store gp_offset
@@ -12862,7 +12877,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
       .addOperand(Base)
       .addOperand(Scale)
       .addOperand(Index)
-      .addDisp(Disp, 8+TD->getPointerSize()) // @LOCALMOD
+      .addDisp(Disp, 8+TD->getPointerSize(0)) // @LOCALMOD
       .addOperand(Segment)
       .setMemRefs(MMOBegin, MMOEnd);
 
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 6f6ff9ca2d..8a0274b5ff 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -518,8 +518,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   const AttrListPtr &PAL = F->getAttributes();
 
   // Add any return attributes.
-  if (Attributes attrs = PAL.getRetAttributes())
-    AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+  Attributes attrs = PAL.getRetAttributes();
+  if (attrs.hasAttributes())
+    AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                                                    attrs));
 
   // First, determine the new argument list
   unsigned ArgIndex = 1;
@@ -535,7 +537,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     } else if (!ArgsToPromote.count(I)) {
       // Unchanged argument
       Params.push_back(I->getType());
-      if (Attributes attrs = PAL.getParamAttributes(ArgIndex))
+      Attributes attrs = PAL.getParamAttributes(ArgIndex);
+      if (attrs.hasAttributes())
         AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs));
     } else if (I->use_empty()) {
       // Dead argument (which are always marked as promotable)
@@ -588,8 +591,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   }
 
   // Add any function attributes.
-  if (Attributes attrs = PAL.getFnAttributes())
-    AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+  attrs = PAL.getFnAttributes();
+  if (attrs.hasAttributes())
+    AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                                                    attrs));
 
   Type *RetTy = FTy->getReturnType();
 
@@ -634,8 +639,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     const AttrListPtr &CallPAL = CS.getAttributes();
 
     // Add any return attributes.
-    if (Attributes attrs = CallPAL.getRetAttributes())
-      AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+    Attributes attrs = CallPAL.getRetAttributes();
+    if (attrs.hasAttributes())
+      AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                                                      attrs));
 
     // Loop over the operands, inserting GEP and loads in the caller as
     // appropriate.
@@ -646,7 +653,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
       if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
         Args.push_back(*AI);          // Unmodified argument
 
-        if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+        Attributes Attrs = CallPAL.getParamAttributes(ArgIndex);
+        if (Attrs.hasAttributes())
           AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
 
       } else if (ByValArgsToTransform.count(I)) {
@@ -707,13 +715,16 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     // Push any varargs arguments on the list.
     for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
       Args.push_back(*AI);
-      if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+      Attributes Attrs = CallPAL.getParamAttributes(ArgIndex);
+      if (Attrs.hasAttributes())
         AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
     }
 
     // Add any function attributes.
-    if (Attributes attrs = CallPAL.getFnAttributes())
-      AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+    attrs = CallPAL.getFnAttributes();
+    if (attrs.hasAttributes())
+      AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                                                      attrs));
 
     Instruction *New;
     if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index b107669b17..a7ff182dac 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -276,8 +276,10 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
       SmallVector<AttributeWithIndex, 8> AttributesVec;
       for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i)
         AttributesVec.push_back(PAL.getSlot(i));
-      if (Attributes FnAttrs = PAL.getFnAttributes())
-        AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+      Attributes FnAttrs = PAL.getFnAttributes();
+      if (FnAttrs.hasAttributes())
+        AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                                                        FnAttrs));
       PAL = AttrListPtr::get(AttributesVec);
     }
 
@@ -762,13 +764,17 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   // here. Currently, this should not be possible, but special handling might be
   // required when new return value attributes are added.
   if (NRetTy->isVoidTy())
-    RAttrs &= ~Attributes::typeIncompatible(NRetTy);
+    RAttrs =
+      Attributes::get(NRetTy->getContext(), Attributes::Builder(RAttrs).
+                      removeAttributes(Attributes::typeIncompatible(NRetTy)));
   else
-    assert((RAttrs & Attributes::typeIncompatible(NRetTy)) == 0
-           && "Return attributes no longer compatible?");
+    assert(!Attributes::Builder(RAttrs).
+             hasAttributes(Attributes::typeIncompatible(NRetTy)) &&
+           "Return attributes no longer compatible?");
 
-  if (RAttrs)
-    AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+  if (RAttrs.hasAttributes())
+    AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                                                    RAttrs));
 
   // Remember which arguments are still alive.
   SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
@@ -785,7 +791,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
 
       // Get the original parameter attributes (skipping the first one, that is
       // for the return value.
-      if (Attributes Attrs = PAL.getParamAttributes(i + 1))
+      Attributes Attrs = PAL.getParamAttributes(i + 1);
+      if (Attrs.hasAttributes())
         AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs));
     } else {
       ++NumArgumentsEliminated;
@@ -795,7 +802,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   }
 
   if (FnAttrs.hasAttributes())
-    AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+    AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                                                    FnAttrs));
 
   // Reconstruct the AttributesList based on the vector we constructed.
   AttrListPtr NewPAL = AttrListPtr::get(AttributesVec);
@@ -831,9 +839,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
     Attributes RAttrs = CallPAL.getRetAttributes();
     Attributes FnAttrs = CallPAL.getFnAttributes();
     // Adjust in case the function was changed to return void.
-    RAttrs &= ~Attributes::typeIncompatible(NF->getReturnType());
-    if (RAttrs)
-      AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+    RAttrs =
+      Attributes::get(NF->getContext(), Attributes::Builder(RAttrs).
+           removeAttributes(Attributes::typeIncompatible(NF->getReturnType())));
+    if (RAttrs.hasAttributes())
+      AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                                                      RAttrs));
 
     // Declare these outside of the loops, so we can reuse them for the second
     // loop, which loops the varargs.
@@ -845,19 +856,22 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
       if (ArgAlive[i]) {
         Args.push_back(*I);
         // Get original parameter attributes, but skip return attributes.
-        if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+        Attributes Attrs = CallPAL.getParamAttributes(i + 1);
+        if (Attrs.hasAttributes())
           AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
       }
 
     // Push any varargs arguments on the list. Don't forget their attributes.
     for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
       Args.push_back(*I);
-      if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+      Attributes Attrs = CallPAL.getParamAttributes(i + 1);
+      if (Attrs.hasAttributes())
         AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
     }
 
     if (FnAttrs.hasAttributes())
-      AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+      AttributesVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                                                      FnAttrs));
 
     // Reconstruct the AttributesList based on the vector we constructed.
     AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec);
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 43e12d4444..9fe42f074a 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -215,12 +215,14 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
     Attributes::Builder B;
     B.addAttribute(Attributes::ReadOnly)
       .addAttribute(Attributes::ReadNone);
-    F->removeAttribute(~0, Attributes::get(B));
+    F->removeAttribute(AttrListPtr::FunctionIndex,
+                       Attributes::get(F->getContext(), B));
 
     // Add in the new attribute.
     B.clear();
     B.addAttribute(ReadsMemory ? Attributes::ReadOnly : Attributes::ReadNone);
-    F->addAttribute(~0, Attributes::get(B));
+    F->addAttribute(AttrListPtr::FunctionIndex,
+                    Attributes::get(F->getContext(), B));
 
     if (ReadsMemory)
       ++NumReadOnly;
@@ -379,7 +381,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
       for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
            A != E; ++A) {
         if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
-          A->addAttr(Attributes::get(B));
+          A->addAttr(Attributes::get(F->getContext(), B));
           ++NumNoCapture;
           Changed = true;
         }
@@ -394,7 +396,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
         if (!Tracker.Captured) {
           if (Tracker.Uses.empty()) {
             // If it's trivially not captured, mark it nocapture now.
-            A->addAttr(Attributes::get(B));
+            A->addAttr(Attributes::get(F->getContext(), B));
             ++NumNoCapture;
             Changed = true;
           } else {
@@ -427,7 +429,9 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
       // eg. "void f(int* x) { if (...) f(x); }"
       if (ArgumentSCC[0]->Uses.size() == 1 &&
           ArgumentSCC[0]->Uses[0] == ArgumentSCC[0]) {
-        ArgumentSCC[0]->Definition->addAttr(Attributes::get(B));
+        ArgumentSCC[0]->
+          Definition->
+          addAttr(Attributes::get(ArgumentSCC[0]->Definition->getContext(), B));
         ++NumNoCapture;
         Changed = true;
       }
@@ -469,7 +473,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
 
     for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
       Argument *A = ArgumentSCC[i]->Definition;
-      A->addAttr(Attributes::get(B));
+      A->addAttr(Attributes::get(A->getContext(), B));
       ++NumNoCapture;
       Changed = true;
     }
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index a1b976577a..d4eeafa859 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2061,7 +2061,7 @@ static void ChangeCalleesToFastCall(Function *F) {
   }
 }
 
-static AttrListPtr StripNest(const AttrListPtr &Attrs) {
+static AttrListPtr StripNest(LLVMContext &C, const AttrListPtr &Attrs) {
   Attributes::Builder B;
   B.addAttribute(Attributes::Nest);
 
@@ -2070,19 +2070,19 @@ static AttrListPtr StripNest(const AttrListPtr &Attrs) {
       continue;
 
     // There can be only one.
-    return Attrs.removeAttr(Attrs.getSlot(i).Index, Attributes::get(B));
+    return Attrs.removeAttr(C, Attrs.getSlot(i).Index, Attributes::get(C, B));
   }
 
   return Attrs;
 }
 
 static void RemoveNestAttribute(Function *F) {
-  F->setAttributes(StripNest(F->getAttributes()));
+  F->setAttributes(StripNest(F->getContext(), F->getAttributes()));
   for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
     if (isa<BlockAddress>(*UI))
       continue;
     CallSite User(cast<Instruction>(*UI));
-    User.setAttributes(StripNest(User.getAttributes()));
+    User.setAttributes(StripNest(F->getContext(), User.getAttributes()));
   }
 }
 
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 3e598abfcf..b5252aac84 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -146,7 +146,9 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
 
       Function *F = (*I)->getFunction();
       const AttrListPtr &PAL = F->getAttributes();
-      const AttrListPtr &NPAL = PAL.addAttr(~0, Attributes::get(NewAttributes));
+      const AttrListPtr &NPAL = PAL.addAttr(F->getContext(), ~0,
+                                            Attributes::get(F->getContext(),
+                                                            NewAttributes));
       if (PAL != NPAL) {
         MadeChange = true;
         F->setAttributes(NPAL);
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 325bb20fbe..41017c5287 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -18,6 +18,7 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Support/InstVisitor.h"
 #include "llvm/Support/TargetFolder.h"
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
 
 namespace llvm {
   class CallSite;
@@ -74,6 +75,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
   DataLayout *TD;
   TargetLibraryInfo *TLI;
   bool MadeIRChange;
+  LibCallSimplifier *Simplifier;
 public:
   /// Worklist - All of the instructions that need to be simplified.
   InstCombineWorklist Worklist;
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 626c17d212..ae70eb7949 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -778,39 +778,6 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
   return true;
 }
 
-namespace {
-class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls {
-  InstCombiner *IC;
-protected:
-  void replaceCall(Value *With) {
-    NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
-  }
-  bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
-    if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
-      return true;
-    if (ConstantInt *SizeCI =
-                           dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
-      if (SizeCI->isAllOnesValue())
-        return true;
-      if (isString) {
-        uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
-        // If the length is 0 we don't know how long it is and so we can't
-        // remove the check.
-        if (Len == 0) return false;
-        return SizeCI->getZExtValue() >= Len;
-      }
-      if (ConstantInt *Arg = dyn_cast<ConstantInt>(
-                                                  CI->getArgOperand(SizeArgOp)))
-        return SizeCI->getZExtValue() >= Arg->getZExtValue();
-    }
-    return false;
-  }
-public:
-  InstCombineFortifiedLibCalls(InstCombiner *IC) : IC(IC), NewInstruction(0) { }
-  Instruction *NewInstruction;
-};
-} // end anonymous namespace
-
 // Try to fold some different type of calls here.
 // Currently we're only working with the checking functions, memcpy_chk,
 // mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
@@ -818,9 +785,10 @@ public:
 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *TD) {
   if (CI->getCalledFunction() == 0) return 0;
 
-  InstCombineFortifiedLibCalls Simplifier(this);
-  Simplifier.fold(CI, TD, TLI);
-  return Simplifier.NewInstruction;
+  if (Value *With = Simplifier->optimizeCall(CI))
+    return ReplaceInstUsesWith(*CI, With);
+
+  return 0;
 }
 
 static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
@@ -1070,7 +1038,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
       return false;   // Cannot transform this parameter value.
 
     Attributes Attrs = CallerPAL.getParamAttributes(i + 1);
-    if (Attrs & Attributes::typeIncompatible(ParamTy))
+    if (Attributes::Builder(Attrs).
+          hasAttributes(Attributes::typeIncompatible(ParamTy)))
       return false;   // Attribute not compatible with transformed value.
 
     // If the parameter is passed as a byval argument, then we have to have a
@@ -1148,7 +1117,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
 
   // Add the new return attributes.
   if (RAttrs.hasAttributes())
-    attrVec.push_back(AttributeWithIndex::get(0, Attributes::get(RAttrs)));
+    attrVec.push_back(
+      AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                              Attributes::get(FT->getContext(), RAttrs)));
 
   AI = CS.arg_begin();
   for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
@@ -1162,7 +1133,8 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     }
 
     // Add any parameter attributes.
-    if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+    Attributes PAttrs = CallerPAL.getParamAttributes(i + 1);
+    if (PAttrs.hasAttributes())
       attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
   }
 
@@ -1192,14 +1164,17 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
         }
 
         // Add any parameter attributes.
-        if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+        Attributes PAttrs = CallerPAL.getParamAttributes(i + 1);
+        if (PAttrs.hasAttributes())
           attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
       }
     }
   }
 
-  if (Attributes FnAttrs =  CallerPAL.getFnAttributes())
-    attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+  Attributes FnAttrs = CallerPAL.getFnAttributes();
+  if (FnAttrs.hasAttributes())
+    attrVec.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                                              FnAttrs));
 
   if (NewRetTy->isVoidTy())
     Caller->setName("");   // Void type should not have a name.
@@ -1307,8 +1282,10 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
       // mean appending it.  Likewise for attributes.
 
       // Add any result attributes.
-      if (Attributes Attr = Attrs.getRetAttributes())
-        NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
+      Attributes Attr = Attrs.getRetAttributes();
+      if (Attr.hasAttributes())
+        NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::ReturnIndex,
+                                                   Attr));
 
       {
         unsigned Idx = 1;
@@ -1328,7 +1305,8 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
 
           // Add the original argument and attributes.
           NewArgs.push_back(*I);
-          if (Attributes Attr = Attrs.getParamAttributes(Idx))
+          Attr = Attrs.getParamAttributes(Idx);
+          if (Attr.hasAttributes())
             NewAttrs.push_back
               (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
 
@@ -1337,8 +1315,10 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
       }
 
       // Add any function attributes.
-      if (Attributes Attr = Attrs.getFnAttributes())
-        NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
+      Attr = Attrs.getFnAttributes();
+      if (Attr.hasAttributes())
+        NewAttrs.push_back(AttributeWithIndex::get(AttrListPtr::FunctionIndex,
+                                                   Attr));
 
       // The trampoline may have been bitcast to a bogus type (FTy).
       // Handle this by synthesizing a new function type, equal to FTy
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index b59210a9df..f3f3f8f585 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1293,15 +1293,16 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
   // If the source integer type is not the intptr_t type for this target, do a
   // trunc or zext to the intptr_t type, then inttoptr of it.  This allows the
   // cast to be exposed to other transforms.
+  unsigned AS = CI.getAddressSpace();
   if (TD) {
     if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
-        TD->getPointerSizeInBits()) {
+        TD->getPointerSizeInBits(AS)) {
       Value *P = Builder->CreateTrunc(CI.getOperand(0),
                                       TD->getIntPtrType(CI.getContext()));
       return new IntToPtrInst(P, CI.getType());
     }
     if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
-        TD->getPointerSizeInBits()) {
+        TD->getPointerSizeInBits(AS)) {
       Value *P = Builder->CreateZExt(CI.getOperand(0),
                                      TD->getIntPtrType(CI.getContext()));
       return new IntToPtrInst(P, CI.getType());
@@ -1368,13 +1369,14 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
   // If the destination integer type is not the intptr_t type for this target,
   // do a ptrtoint to intptr_t then do a trunc or zext.  This allows the cast
   // to be exposed to other transforms.
+  unsigned AS = CI.getPointerAddressSpace();
   if (TD) {
-    if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
+    if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits(AS)) {
       Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
                                          TD->getIntPtrType(CI.getContext()));
       return new TruncInst(P, CI.getType());
     }
-    if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) {
+    if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits(AS)) {
       Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
                                          TD->getIntPtrType(CI.getContext()));
       return new ZExtInst(P, CI.getType());
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 4d5ffddc4c..e3e5ddae80 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -365,11 +365,12 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   // order the state machines in complexity of the generated code.
   Value *Idx = GEP->getOperand(2);
 
+  unsigned AS = GEP->getPointerAddressSpace();
   // If the index is larger than the pointer size of the target, truncate the
   // index down like the GEP would do implicitly.  We don't have to do this for
   // an inbounds GEP because the index can't be out of range.
   if (!GEP->isInBounds() &&
-      Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
+      Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits(AS))
     Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
 
   // If the comparison is only true for one or two elements, emit direct
@@ -528,10 +529,11 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
     }
   }
 
+  unsigned AS = cast<GetElementPtrInst>(GEP)->getPointerAddressSpace();
   // Okay, we know we have a single variable index, which must be a
   // pointer/array/vector index.  If there is no offset, life is simple, return
   // the index.
-  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
   if (Offset == 0) {
     // Cast to intptrty in case a truncation occurs.  If an extension is needed,
     // we don't need to bother extending: the extension won't affect where the
@@ -1552,7 +1554,8 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
   // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
   // integer type is the same size as the pointer type.
   if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
-      TD->getPointerSizeInBits() ==
+      TD->getPointerSizeInBits(
+        cast<PtrToIntInst>(LHSCI)->getPointerAddressSpace()) ==
          cast<IntegerType>(DestTy)->getBitWidth()) {
     Value *RHSOp = 0;
     if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index edfc060888..5356fdcba7 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2130,6 +2130,9 @@ bool InstCombiner::runOnFunction(Function &F) {
                InstCombineIRInserter(Worklist));
   Builder = &TheBuilder;
 
+  LibCallSimplifier TheSimplifier(TD, TLI);
+  Simplifier = &TheSimplifier;
+
   bool EverMadeChange = false;
 
   // Lower dbg.declare intrinsics otherwise their value may be clobbered
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 10ab9cb603..b566994edf 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -148,38 +148,29 @@ static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
                                cl::Hidden, cl::init(-1));
 
 namespace {
-
-/// An object of this type is created while instrumenting every function.
-struct AsanFunctionContext {
-  AsanFunctionContext(Function &Function) : F(Function) { }
-
-  Function &F;
-};
-
 /// AddressSanitizer: instrument the code in module to find memory bugs.
-struct AddressSanitizer : public ModulePass {
+struct AddressSanitizer : public FunctionPass {
   AddressSanitizer();
   virtual const char *getPassName() const;
-  void instrumentMop(AsanFunctionContext &AFC, Instruction *I);
-  void instrumentAddress(AsanFunctionContext &AFC,
-                         Instruction *OrigIns, IRBuilder<> &IRB,
+  void instrumentMop(Instruction *I);
+  void instrumentAddress(Instruction *OrigIns, IRBuilder<> &IRB,
                          Value *Addr, uint32_t TypeSize, bool IsWrite);
   Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
                            Value *ShadowValue, uint32_t TypeSize);
   Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
                                  bool IsWrite, size_t AccessSizeIndex);
-  bool instrumentMemIntrinsic(AsanFunctionContext &AFC, MemIntrinsic *MI);
-  void instrumentMemIntrinsicParam(AsanFunctionContext &AFC,
-                                   Instruction *OrigIns, Value *Addr,
+  bool instrumentMemIntrinsic(MemIntrinsic *MI);
+  void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr,
                                    Value *Size,
                                    Instruction *InsertBefore, bool IsWrite);
   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
-  bool handleFunction(Module &M, Function &F);
+  bool runOnFunction(Function &F);
   void createInitializerPoisonCalls(Module &M,
                                     Value *FirstAddr, Value *LastAddr);
   bool maybeInsertAsanInitAtFunctionEntry(Function &F);
-  bool poisonStackInFunction(Module &M, Function &F);
-  virtual bool runOnModule(Module &M);
+  bool poisonStackInFunction(Function &F);
+  virtual bool doInitialization(Module &M);
+  virtual bool doFinalization(Module &M);
   bool insertGlobalRedzones(Module &M);
   static char ID;  // Pass identification, replacement for typeid
 
@@ -216,6 +207,8 @@ struct AddressSanitizer : public ModulePass {
   Type *IntptrPtrTy;
   Function *AsanCtorFunction;
   Function *AsanInitFunction;
+  Function *AsanStackMallocFunc, *AsanStackFreeFunc;
+  Function *AsanHandleNoReturnFunc;
   Instruction *CtorInsertBefore;
   OwningPtr<BlackList> BL;
   // This array is indexed by AccessIsWrite and log2(AccessSize).
@@ -230,8 +223,8 @@ char AddressSanitizer::ID = 0;
 INITIALIZE_PASS(AddressSanitizer, "asan",
     "AddressSanitizer: detects use-after-free and out-of-bounds bugs.",
     false, false)
-AddressSanitizer::AddressSanitizer() : ModulePass(ID) { }
-ModulePass *llvm::createAddressSanitizerPass() {
+AddressSanitizer::AddressSanitizer() : FunctionPass(ID) { }
+FunctionPass *llvm::createAddressSanitizerPass() {
   return new AddressSanitizer();
 }
 
@@ -295,12 +288,12 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
 }
 
 void AddressSanitizer::instrumentMemIntrinsicParam(
-    AsanFunctionContext &AFC, Instruction *OrigIns,
+    Instruction *OrigIns,
     Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) {
   // Check the first byte.
   {
     IRBuilder<> IRB(InsertBefore);
-    instrumentAddress(AFC, OrigIns, IRB, Addr, 8, IsWrite);
+    instrumentAddress(OrigIns, IRB, Addr, 8, IsWrite);
   }
   // Check the last byte.
   {
@@ -310,13 +303,12 @@ void AddressSanitizer::instrumentMemIntrinsicParam(
     SizeMinusOne = IRB.CreateIntCast(SizeMinusOne, IntptrTy, false);
     Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
     Value *AddrPlusSizeMinisOne = IRB.CreateAdd(AddrLong, SizeMinusOne);
-    instrumentAddress(AFC, OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite);
+    instrumentAddress(OrigIns, IRB, AddrPlusSizeMinisOne, 8, IsWrite);
   }
 }
 
 // Instrument memset/memmove/memcpy
-bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC,
-                                              MemIntrinsic *MI) {
+bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
   Value *Dst = MI->getDest();
   MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI);
   Value *Src = MemTran ? MemTran->getSource() : 0;
@@ -335,9 +327,9 @@ bool AddressSanitizer::instrumentMemIntrinsic(AsanFunctionContext &AFC,
     InsertBefore = splitBlockAndInsertIfThen(Cmp, false);
   }
 
-  instrumentMemIntrinsicParam(AFC, MI, Dst, Length, InsertBefore, true);
+  instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true);
   if (Src)
-    instrumentMemIntrinsicParam(AFC, MI, Src, Length, InsertBefore, false);
+    instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false);
   return true;
 }
 
@@ -391,7 +383,7 @@ bool AddressSanitizer::HasDynamicInitializer(GlobalVariable *G) {
   return DynamicallyInitializedGlobals.count(G);
 }
 
-void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) {
+void AddressSanitizer::instrumentMop(Instruction *I) {
   bool IsWrite = false;
   Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
   assert(Addr);
@@ -424,7 +416,7 @@ void AddressSanitizer::instrumentMop(AsanFunctionContext &AFC, Instruction *I) {
   }
 
   IRBuilder<> IRB(I);
-  instrumentAddress(AFC, I, IRB, Addr, TypeSize, IsWrite);
+  instrumentAddress(I, IRB, Addr, TypeSize, IsWrite);
 }
 
 // Validate the result of Module::getOrInsertFunction called for an interface
@@ -469,8 +461,7 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
   return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
 }
 
-void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,
-                                         Instruction *OrigIns,
+void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
                                          IRBuilder<> &IRB, Value *Addr,
                                          uint32_t TypeSize, bool IsWrite) {
   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
@@ -494,7 +485,8 @@ void AddressSanitizer::instrumentAddress(AsanFunctionContext &AFC,
     BasicBlock *NextBB = CheckTerm->getSuccessor(0);
     IRB.SetInsertPoint(CheckTerm);
     Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
-    BasicBlock *CrashBlock = BasicBlock::Create(*C, "", &AFC.F, NextBB);
+    BasicBlock *CrashBlock =
+        BasicBlock::Create(*C, "", NextBB->getParent(), NextBB);
     CrashTerm = new UnreachableInst(*C, CrashBlock);
     BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
     ReplaceInstWithInst(CheckTerm, NewTerm);
@@ -734,15 +726,16 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
 }
 
 // virtual
-bool AddressSanitizer::runOnModule(Module &M) {
+bool AddressSanitizer::doInitialization(Module &M) {
   // Initialize the private fields. No one has accessed them before.
   TD = getAnalysisIfAvailable<DataLayout>();
+
   if (!TD)
     return false;
   BL.reset(new BlackList(ClBlackListFile));
 
   C = &(M.getContext());
-  LongSize = TD->getPointerSizeInBits();
+  LongSize = TD->getPointerSizeInBits(0);
   IntptrTy = Type::getIntNTy(*C, LongSize);
   IntptrPtrTy = PointerType::get(IntptrTy, 0);
 
@@ -771,6 +764,15 @@ bool AddressSanitizer::runOnModule(Module &M) {
           M.getOrInsertFunction(FunctionName, IRB.getVoidTy(), IntptrTy, NULL));
     }
   }
+
+  AsanStackMallocFunc = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL));
+  AsanStackFreeFunc = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanStackFreeName, IRB.getVoidTy(),
+      IntptrTy, IntptrTy, IntptrTy, NULL));
+  AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
+      kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
+
   // We insert an empty inline asm after __asan_report* to avoid callback merge.
   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
                             StringRef(""), StringRef(""),
@@ -797,10 +799,6 @@ bool AddressSanitizer::runOnModule(Module &M) {
   // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
   RedzoneSize = std::max(32, (int)(1 << MappingScale));
 
-  bool Res = false;
-
-  if (ClGlobals)
-    Res |= insertGlobalRedzones(M);
 
   if (ClMappingOffsetLog >= 0) {
     // Tell the run-time the current values of mapping offset and scale.
@@ -820,17 +818,20 @@ bool AddressSanitizer::runOnModule(Module &M) {
     IRB.CreateLoad(asan_mapping_scale, true);
   }
 
-
-  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    if (F->isDeclaration()) continue;
-    Res |= handleFunction(M, *F);
-  }
-
   appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority);
 
-  return Res;
+  return true;
+}
+
+bool AddressSanitizer::doFinalization(Module &M) {
+  // We transform the globals at the very end so that the optimization analysis
+  // works on the original globals.
+  if (ClGlobals)
+    return insertGlobalRedzones(M);
+  return false;
 }
 
+
 bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
   // For each NSObject descendant having a +load method, this method is invoked
   // by the ObjC runtime before any of the static constructors is called.
@@ -847,7 +848,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
   return false;
 }
 
-bool AddressSanitizer::handleFunction(Module &M, Function &F) {
+bool AddressSanitizer::runOnFunction(Function &F) {
   if (BL->isIn(F)) return false;
   if (&F == AsanCtorFunction) return false;
 
@@ -899,8 +900,6 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) {
     }
   }
 
-  AsanFunctionContext AFC(F);
-
   // Instrument.
   int NumInstrumented = 0;
   for (size_t i = 0, n = ToInstrument.size(); i != n; i++) {
@@ -908,24 +907,23 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) {
     if (ClDebugMin < 0 || ClDebugMax < 0 ||
         (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
       if (isInterestingMemoryAccess(Inst, &IsWrite))
-        instrumentMop(AFC, Inst);
+        instrumentMop(Inst);
       else
-        instrumentMemIntrinsic(AFC, cast<MemIntrinsic>(Inst));
+        instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
     }
     NumInstrumented++;
   }
 
   DEBUG(dbgs() << F);
 
-  bool ChangedStack = poisonStackInFunction(M, F);
+  bool ChangedStack = poisonStackInFunction(F);
 
   // We must unpoison the stack before every NoReturn call (throw, _exit, etc).
   // See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37
   for (size_t i = 0, n = NoReturnCalls.size(); i != n; i++) {
     Instruction *CI = NoReturnCalls[i];
     IRBuilder<> IRB(CI);
-    IRB.CreateCall(M.getOrInsertFunction(kAsanHandleNoReturnName,
-                                         IRB.getVoidTy(), NULL));
+    IRB.CreateCall(AsanHandleNoReturnFunc);
   }
 
   return NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
@@ -1039,7 +1037,7 @@ bool AddressSanitizer::LooksLikeCodeInBug11395(Instruction *I) {
 // compiler hoists the load of the shadow value somewhere too high.
 // This causes asan to report a non-existing bug on 453.povray.
 // It sounds like an LLVM bug.
-bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
+bool AddressSanitizer::poisonStackInFunction(Function &F) {
   if (!ClStack) return false;
   SmallVector<AllocaInst*, 16> AllocaVec;
   SmallVector<Instruction*, 8> RetVec;
@@ -1089,8 +1087,6 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
   Value *LocalStackBase = OrigStackBase;
 
   if (DoStackMalloc) {
-    Value *AsanStackMallocFunc = M.getOrInsertFunction(
-        kAsanStackMallocName, IntptrTy, IntptrTy, IntptrTy, NULL);
     LocalStackBase = IRB.CreateCall2(AsanStackMallocFunc,
         ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
   }
@@ -1126,7 +1122,7 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
                                    ConstantInt::get(IntptrTy, LongSize/8));
   BasePlus1 = IRB.CreateIntToPtr(BasePlus1, IntptrPtrTy);
   Value *Description = IRB.CreatePointerCast(
-      createPrivateGlobalForString(M, StackDescription.str()),
+      createPrivateGlobalForString(*F.getParent(), StackDescription.str()),
       IntptrTy);
   IRB.CreateStore(Description, BasePlus1);
 
@@ -1134,13 +1130,6 @@ bool AddressSanitizer::poisonStackInFunction(Module &M, Function &F) {
   Value *ShadowBase = memToShadow(LocalStackBase, IRB);
   PoisonStack(ArrayRef<AllocaInst*>(AllocaVec), IRB, ShadowBase, true);
 
-  Value *AsanStackFreeFunc = NULL;
-  if (DoStackMalloc) {
-    AsanStackFreeFunc = M.getOrInsertFunction(
-        kAsanStackFreeName, IRB.getVoidTy(),
-        IntptrTy, IntptrTy, IntptrTy, NULL);
-  }
-
   // Unpoison the stack before all ret instructions.
   for (size_t i = 0, n = RetVec.size(); i < n; i++) {
     Instruction *Ret = RetVec[i];
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 4d31444b76..665d5b0171 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -774,8 +774,10 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
     // Conservatively require the attributes of the call to match those of the
     // return. Ignore noalias because it doesn't affect the call sequence.
     Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes();
-    if (Attributes::Builder(CalleeRetAttr ^ CallerRetAttr)
-        .removeAttribute(Attributes::NoAlias).hasAttributes())
+    if (Attributes::Builder(CalleeRetAttr).
+          removeAttribute(Attributes::NoAlias) !=
+        Attributes::Builder(CallerRetAttr).
+          removeAttribute(Attributes::NoAlias))
       continue;
 
     // Make sure the call instruction is followed by an unconditional branch to
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 602e5a4785..736cc05e04 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -701,6 +701,22 @@ bool DSE::HandleFree(CallInst *F) {
   return MadeChange;
 }
 
+namespace {
+  struct CouldRef {
+    typedef Value *argument_type;
+    const CallSite CS;
+    AliasAnalysis *AA;
+
+    bool operator()(Value *I) {
+      // See if the call site touches the value.
+      AliasAnalysis::ModRefResult A =
+        AA->getModRefInfo(CS, I, getPointerSize(I, *AA));
+
+      return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref;
+    }
+  };
+}
+
 /// handleEndBlock - Remove dead stores to stack-allocated locations in the
 /// function end block.  Ex:
 /// %A = alloca i32
@@ -802,26 +818,14 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
 
       // If the call might load from any of our allocas, then any store above
       // the call is live.
-      SmallVector<Value*, 8> LiveAllocas;
-      for (SmallSetVector<Value*, 16>::iterator I = DeadStackObjects.begin(),
-           E = DeadStackObjects.end(); I != E; ++I) {
-        // See if the call site touches it.
-        AliasAnalysis::ModRefResult A =
-          AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
-
-        if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
-          LiveAllocas.push_back(*I);
-      }
+      CouldRef Pred = { CS, AA };
+      DeadStackObjects.remove_if(Pred);
 
       // If all of the allocas were clobbered by the call then we're not going
       // to find anything else to process.
-      if (DeadStackObjects.size() == LiveAllocas.size())
+      if (DeadStackObjects.empty())
         break;
 
-      for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
-           E = LiveAllocas.end(); I != E; ++I)
-        DeadStackObjects.remove(*I);
-
       continue;
     }
 
@@ -858,6 +862,20 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
   return MadeChange;
 }
 
+namespace {
+  struct CouldAlias {
+    typedef Value *argument_type;
+    const AliasAnalysis::Location &LoadedLoc;
+    AliasAnalysis *AA;
+
+    bool operator()(Value *I) {
+      // See if the loaded location could alias the stack location.
+      AliasAnalysis::Location StackLoc(I, getPointerSize(I, *AA));
+      return !AA->isNoAlias(StackLoc, LoadedLoc);
+    }
+  };
+}
+
 /// RemoveAccessedObjects - Check to see if the specified location may alias any
 /// of the stack objects in the DeadStackObjects set.  If so, they become live
 /// because the location is being loaded.
@@ -876,16 +894,7 @@ void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
     return;
   }
 
-  SmallVector<Value*, 16> NowLive;
-  for (SmallSetVector<Value*, 16>::iterator I = DeadStackObjects.begin(),
-       E = DeadStackObjects.end(); I != E; ++I) {
-    // See if the loaded location could alias the stack location.
-    AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA));
-    if (!AA->isNoAlias(StackLoc, LoadedLoc))
-      NowLive.push_back(*I);
-  }
-
-  for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end();
-       I != E; ++I)
-    DeadStackObjects.remove(*I);
+  // Remove objects that could alias LoadedLoc.
+  CouldAlias Pred = { LoadedLoc, AA };
+  DeadStackObjects.remove_if(Pred);
 }
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 517657cf52..97fff9edd6 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -174,10 +174,11 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const {
   // this width can be stored.  If so, check to see whether we will end up
   // actually reducing the number of stores used.
   unsigned Bytes = unsigned(End-Start);
-  unsigned NumPointerStores = Bytes/TD.getPointerSize();
+  unsigned AS = cast<StoreInst>(TheStores[0])->getPointerAddressSpace();
+  unsigned NumPointerStores = Bytes/TD.getPointerSize(AS);
 
   // Assume the remaining bytes if any are done a byte at a time.
-  unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
+  unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize(AS);
 
   // If we will reduce the # stores (according to this heuristic), do the
   // transformation.  This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
diff --git a/lib/Transforms/Scalar/NaClCcRewrite.cpp b/lib/Transforms/Scalar/NaClCcRewrite.cpp
index 477e131d37..21600f015a 100644
--- a/lib/Transforms/Scalar/NaClCcRewrite.cpp
+++ b/lib/Transforms/Scalar/NaClCcRewrite.cpp
@@ -543,7 +543,7 @@ void FixFunctionByvalsParameter(Function& F,
     v = new StoreInst(arg, v, before);
 
     new_arguments.push_back(arg);
-    new_attributes.push_back(Attributes(Attributes::None));
+    new_attributes.push_back(Attributes());
   }
 }
 
@@ -585,7 +585,7 @@ void UpdateFunctionSignature(Function &F,
   // Update function attributes
   for (size_t i = 0; i < new_attributes.size(); ++i) {
     Attributes attr = new_attributes[i];
-    if (attr) {
+    if (attr.hasAttributes()) {
       // index 0 is for the return value
       F.addAttribute(i + 1, attr);
     }
@@ -752,7 +752,7 @@ void PrependCompensationForByvals(std::vector<Value*>& new_operands,
     v = new LoadInst(v, "byval_extract", call);
 
     new_operands.push_back(v);
-    new_attributes.push_back(Attributes(Attributes::None));
+    new_attributes.push_back(Attributes());
   }
 }
 
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
index 629f9d2ff5..017df8f1a4 100644
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -1790,7 +1790,9 @@ Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
     FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
     Attributes::Builder B;
     B.addAttribute(Attributes::NoUnwind);
-    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+    AttrListPtr Attributes =
+      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+                            Attributes::get(M->getContext(), B));
     RetainRVCallee =
       M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
                              Attributes);
@@ -1806,7 +1808,9 @@ Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
     FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
     Attributes::Builder B;
     B.addAttribute(Attributes::NoUnwind);
-    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+    AttrListPtr Attributes =
+      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+                            Attributes::get(C, B));
     AutoreleaseRVCallee =
       M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
                              Attributes);
@@ -1820,7 +1824,9 @@ Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
     Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
     Attributes::Builder B;
     B.addAttribute(Attributes::NoUnwind);
-    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+    AttrListPtr Attributes =
+      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+                            Attributes::get(C, B));
     ReleaseCallee =
       M->getOrInsertFunction(
         "objc_release",
@@ -1836,7 +1842,9 @@ Constant *ObjCARCOpt::getRetainCallee(Module *M) {
     Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
     Attributes::Builder B;
     B.addAttribute(Attributes::NoUnwind);
-    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+    AttrListPtr Attributes =
+      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+                            Attributes::get(C, B));
     RetainCallee =
       M->getOrInsertFunction(
         "objc_retain",
@@ -1867,7 +1875,9 @@ Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
     Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
     Attributes::Builder B;
     B.addAttribute(Attributes::NoUnwind);
-    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+    AttrListPtr Attributes =
+      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+                            Attributes::get(C, B));
     AutoreleaseCallee =
       M->getOrInsertFunction(
         "objc_autorelease",
@@ -3845,8 +3855,9 @@ Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
     Attributes::Builder BNoCapture;
     BNoCapture.addAttribute(Attributes::NoCapture);
     AttrListPtr Attributes = AttrListPtr()
-      .addAttr(~0u, Attributes::get(BNoUnwind))
-      .addAttr(1, Attributes::get(BNoCapture));
+      .addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+               Attributes::get(C, BNoUnwind))
+      .addAttr(M->getContext(), 1, Attributes::get(C, BNoCapture));
 
     StoreStrongCallee =
       M->getOrInsertFunction(
@@ -3865,7 +3876,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
     FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
     Attributes::Builder B;
     B.addAttribute(Attributes::NoUnwind);
-    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+    AttrListPtr Attributes =
+      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+                            Attributes::get(C, B));
     RetainAutoreleaseCallee =
       M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes);
   }
@@ -3880,7 +3893,9 @@ Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
     FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
     Attributes::Builder B;
     B.addAttribute(Attributes::NoUnwind);
-    AttrListPtr Attributes = AttrListPtr().addAttr(~0u, Attributes::get(B));
+    AttrListPtr Attributes =
+      AttrListPtr().addAttr(M->getContext(), AttrListPtr::FunctionIndex,
+                            Attributes::get(C, B));
     RetainAutoreleaseRVCallee =
       M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
                              Attributes);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index ca76251492..3e84a91c1d 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -447,6 +447,7 @@ protected:
 
   bool computeConstantGEPOffset(GetElementPtrInst &GEPI, int64_t &GEPOffset) {
     GEPOffset = Offset;
+    unsigned int AS = GEPI.getPointerAddressSpace();
     for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI);
          GTI != GTE; ++GTI) {
       ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
@@ -476,7 +477,7 @@ protected:
         continue;
       }
 
-      APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits());
+      APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits(AS));
       Index *= APInt(Index.getBitWidth(),
                      TD.getTypeAllocSize(GTI.getIndexedType()));
       Index += APInt(Index.getBitWidth(), (uint64_t)GEPOffset,
@@ -1784,7 +1785,9 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
       break;
     if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) {
       ElementTy = SeqTy->getElementType();
-      Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(), 0)));
+      Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(
+                ElementTy->isPointerTy() ? 
+                cast<PointerType>(ElementTy)->getAddressSpace(): 0), 0)));
     } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
       if (STy->element_begin() == STy->element_end())
         break; // Nothing left to descend into.
@@ -2004,6 +2007,51 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
   return Ptr;
 }
 
+/// \brief Test whether we can convert a value from the old to the new type.
+///
+/// This predicate should be used to guard calls to convertValue in order to
+/// ensure that we only try to convert viable values. The strategy is that we
+/// will peel off single element struct and array wrappings to get to an
+/// underlying value, and convert that value.
+static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
+  if (OldTy == NewTy)
+    return true;
+  if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
+    return false;
+  if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
+    return false;
+
+  if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
+    if (NewTy->isPointerTy() && OldTy->isPointerTy())
+      return true;
+    if (NewTy->isIntegerTy() || OldTy->isIntegerTy())
+      return true;
+    return false;
+  }
+
+  return true;
+}
+
+/// \brief Generic routine to convert an SSA value to a value of a different
+/// type.
+///
+/// This will try various different casting techniques, such as bitcasts,
+/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
+/// two types for viability with this routine.
+static Value *convertValue(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+                           Type *Ty) {
+  assert(canConvertValue(DL, V->getType(), Ty) &&
+         "Value not convertable to type");
+  if (V->getType() == Ty)
+    return V;
+  if (V->getType()->isIntegerTy() && Ty->isPointerTy())
+    return IRB.CreateIntToPtr(V, Ty);
+  if (V->getType()->isPointerTy() && Ty->isIntegerTy())
+    return IRB.CreatePtrToInt(V, Ty);
+
+  return IRB.CreateBitCast(V, Ty);
+}
+
 /// \brief Test whether the given alloca partition can be promoted to a vector.
 ///
 /// This is a quick test to check whether we can rewrite a particular alloca
@@ -2075,47 +2123,74 @@ static bool isVectorPromotionViable(const DataLayout &TD,
   return true;
 }
 
-/// \brief Test whether the given alloca partition can be promoted to an int.
+/// \brief Test whether the given alloca partition's integer operations can be
+/// widened to promotable ones.
 ///
-/// This is a quick test to check whether we can rewrite a particular alloca
-/// partition (and its newly formed alloca) into an integer alloca suitable for
-/// promotion to an SSA value. We only can ensure this for a limited set of
-/// operations, and we don't want to do the rewrites unless we are confident
-/// that the result will be promotable, so we have an early test here.
-static bool isIntegerPromotionViable(const DataLayout &TD,
-                                     Type *AllocaTy,
-                                     uint64_t AllocBeginOffset,
-                                     AllocaPartitioning &P,
-                                     AllocaPartitioning::const_use_iterator I,
-                                     AllocaPartitioning::const_use_iterator E) {
-  IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy);
-  if (!Ty || 8*TD.getTypeStoreSize(Ty) != Ty->getBitWidth())
+/// This is a quick test to check whether we can rewrite the integer loads and
+/// stores to a particular alloca into wider loads and stores and be able to
+/// promote the resulting alloca.
+static bool isIntegerWideningViable(const DataLayout &TD,
+                                    Type *AllocaTy,
+                                    uint64_t AllocBeginOffset,
+                                    AllocaPartitioning &P,
+                                    AllocaPartitioning::const_use_iterator I,
+                                    AllocaPartitioning::const_use_iterator E) {
+  uint64_t SizeInBits = TD.getTypeSizeInBits(AllocaTy);
+
+  // Don't try to handle allocas with bit-padding.
+  if (SizeInBits != TD.getTypeStoreSizeInBits(AllocaTy))
     return false;
 
+  uint64_t Size = TD.getTypeStoreSize(AllocaTy);
+
   // Check the uses to ensure the uses are (likely) promoteable integer uses.
   // Also ensure that the alloca has a covering load or store. We don't want
-  // promote because of some other unsplittable entry (which we may make
-  // splittable later) and lose the ability to promote each element access.
+  // to widen the integer operotains only to fail to promote due to some other
+  // unsplittable entry (which we may make splittable later).
   bool WholeAllocaOp = false;
   for (; I != E; ++I) {
     if (!I->U)
       continue; // Skip dead use.
 
+    uint64_t RelBegin = I->BeginOffset - AllocBeginOffset;
+    uint64_t RelEnd = I->EndOffset - AllocBeginOffset;
+
     // We can't reasonably handle cases where the load or store extends past
     // the end of the aloca's type and into its padding.
-    if ((I->EndOffset - AllocBeginOffset) > TD.getTypeStoreSize(Ty))
+    if (RelEnd > Size)
       return false;
 
     if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) {
-      if (LI->isVolatile() || !LI->getType()->isIntegerTy())
+      if (LI->isVolatile())
         return false;
-      if (LI->getType() == Ty)
+      if (RelBegin == 0 && RelEnd == Size)
         WholeAllocaOp = true;
+      if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
+        if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy))
+          return false;
+        continue;
+      }
+      // Non-integer loads need to be convertible from the alloca type so that
+      // they are promotable.
+      if (RelBegin != 0 || RelEnd != Size ||
+          !canConvertValue(TD, AllocaTy, LI->getType()))
+        return false;
     } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) {
-      if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy())
+      Type *ValueTy = SI->getValueOperand()->getType();
+      if (SI->isVolatile())
         return false;
-      if (SI->getValueOperand()->getType() == Ty)
+      if (RelBegin == 0 && RelEnd == Size)
         WholeAllocaOp = true;
+      if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
+        if (ITy->getBitWidth() < TD.getTypeStoreSize(ITy))
+          return false;
+        continue;
+      }
+      // Non-integer stores need to be convertible to the alloca type so that
+      // they are promotable.
+      if (RelBegin != 0 || RelEnd != Size ||
+          !canConvertValue(TD, ValueTy, AllocaTy))
+        return false;
     } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
       if (MI->isVolatile())
         return false;
@@ -2125,6 +2200,10 @@ static bool isIntegerPromotionViable(const DataLayout &TD,
         if (!MTO.IsSplittable)
           return false;
       }
+    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->U->getUser())) {
+      if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+          II->getIntrinsicID() != Intrinsic::lifetime_end)
+        return false;
     } else {
       return false;
     }
@@ -2149,6 +2228,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
   SROA &Pass;
   AllocaInst &OldAI, &NewAI;
   const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
+  Type *NewAllocaTy;
 
   // If we are rewriting an alloca partition which can be written as pure
   // vector operations, we stash extra information here. When VecTy is
@@ -2164,10 +2244,10 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
   uint64_t ElementSize;
 
   // This is a convenience and flag variable that will be null unless the new
-  // alloca has a promotion-targeted integer type due to passing
-  // isIntegerPromotionViable above. If it is non-null does, the desired
+  // alloca's integer operations should be widened to this integer type due to
+  // passing isIntegerWideningViable above. If it is non-null, the desired
   // integer type will be stored here for easy access during rewriting.
-  IntegerType *IntPromotionTy;
+  IntegerType *IntTy;
 
   // The offset of the partition user currently being rewritten.
   uint64_t BeginOffset, EndOffset;
@@ -2186,7 +2266,8 @@ public:
       OldAI(OldAI), NewAI(NewAI),
       NewAllocaBeginOffset(NewBeginOffset),
       NewAllocaEndOffset(NewEndOffset),
-      VecTy(), ElementTy(), ElementSize(), IntPromotionTy(),
+      NewAllocaTy(NewAI.getAllocatedType()),
+      VecTy(), ElementTy(), ElementSize(), IntTy(),
       BeginOffset(), EndOffset() {
   }
 
@@ -2202,9 +2283,10 @@ public:
       assert((VecTy->getScalarSizeInBits() % 8) == 0 &&
              "Only multiple-of-8 sized vector elements are viable");
       ElementSize = VecTy->getScalarSizeInBits() / 8;
-    } else if (isIntegerPromotionViable(TD, NewAI.getAllocatedType(),
-                                        NewAllocaBeginOffset, P, I, E)) {
-      IntPromotionTy = cast<IntegerType>(NewAI.getAllocatedType());
+    } else if (isIntegerWideningViable(TD, NewAI.getAllocatedType(),
+                                       NewAllocaBeginOffset, P, I, E)) {
+      IntTy = Type::getIntNTy(NewAI.getContext(),
+                              TD.getTypeSizeInBits(NewAI.getAllocatedType()));
     }
     bool CanSROA = true;
     for (; I != E; ++I) {
@@ -2223,6 +2305,10 @@ public:
       ElementTy = 0;
       ElementSize = 0;
     }
+    if (IntTy) {
+      assert(CanSROA);
+      IntTy = 0;
+    }
     return CanSROA;
   }
 
@@ -2239,7 +2325,8 @@ private:
 
   Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) {
     assert(BeginOffset >= NewAllocaBeginOffset);
-    APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset);
+    unsigned AS = cast<PointerType>(PointerTy)->getAddressSpace();
+    APInt Offset(TD.getPointerSizeInBits(AS), BeginOffset - NewAllocaBeginOffset);
     return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName(""));
   }
 
@@ -2286,55 +2373,56 @@ private:
 
   Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy,
                         uint64_t Offset) {
-    assert(IntPromotionTy && "Alloca is not an integer we can extract from");
+    assert(IntTy && "We cannot extract an integer from the alloca");
     Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
                                      getName(".load"));
+    V = convertValue(TD, IRB, V, IntTy);
     assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
     uint64_t RelOffset = Offset - NewAllocaBeginOffset;
     assert(TD.getTypeStoreSize(TargetTy) + RelOffset <=
-           TD.getTypeStoreSize(IntPromotionTy) &&
+           TD.getTypeStoreSize(IntTy) &&
            "Element load outside of alloca store");
     uint64_t ShAmt = 8*RelOffset;
     if (TD.isBigEndian())
-      ShAmt = 8*(TD.getTypeStoreSize(IntPromotionTy) -
+      ShAmt = 8*(TD.getTypeStoreSize(IntTy) -
                  TD.getTypeStoreSize(TargetTy) - RelOffset);
     if (ShAmt)
       V = IRB.CreateLShr(V, ShAmt, getName(".shift"));
-    if (TargetTy != IntPromotionTy) {
-      assert(TargetTy->getBitWidth() < IntPromotionTy->getBitWidth() &&
-             "Cannot extract to a larger integer!");
+    assert(TargetTy->getBitWidth() <= IntTy->getBitWidth() &&
+           "Cannot extract to a larger integer!");
+    if (TargetTy != IntTy)
       V = IRB.CreateTrunc(V, TargetTy, getName(".trunc"));
-    }
     return V;
   }
 
   StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) {
     IntegerType *Ty = cast<IntegerType>(V->getType());
-    if (Ty == IntPromotionTy)
-      return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
-
-    assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() &&
+    assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
            "Cannot insert a larger integer!");
-    V = IRB.CreateZExt(V, IntPromotionTy, getName(".ext"));
+    if (Ty != IntTy)
+      V = IRB.CreateZExt(V, IntTy, getName(".ext"));
     assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
     uint64_t RelOffset = Offset - NewAllocaBeginOffset;
     assert(TD.getTypeStoreSize(Ty) + RelOffset <=
-           TD.getTypeStoreSize(IntPromotionTy) &&
+           TD.getTypeStoreSize(IntTy) &&
            "Element store outside of alloca store");
     uint64_t ShAmt = 8*RelOffset;
     if (TD.isBigEndian())
-      ShAmt = 8*(TD.getTypeStoreSize(IntPromotionTy) - TD.getTypeStoreSize(Ty)
+      ShAmt = 8*(TD.getTypeStoreSize(IntTy) - TD.getTypeStoreSize(Ty)
                  - RelOffset);
     if (ShAmt)
       V = IRB.CreateShl(V, ShAmt, getName(".shift"));
 
-    APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth()).shl(ShAmt);
-    Value *Old = IRB.CreateAnd(IRB.CreateAlignedLoad(&NewAI,
-                                                     NewAI.getAlignment(),
-                                                     getName(".oldload")),
-                               Mask, getName(".mask"));
-    return IRB.CreateAlignedStore(IRB.CreateOr(Old, V, getName(".insert")),
-                                  &NewAI, NewAI.getAlignment());
+    if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
+      APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
+      Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                         getName(".oldload"));
+      Old = convertValue(TD, IRB, Old, IntTy);
+      Old = IRB.CreateAnd(Old, Mask, getName(".mask"));
+      V = IRB.CreateOr(Old, V, getName(".insert"));
+    }
+    V = convertValue(TD, IRB, V, NewAllocaTy);
+    return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
   }
 
   void deleteIfTriviallyDead(Value *V) {
@@ -2343,15 +2431,6 @@ private:
       Pass.DeadInsts.push_back(I);
   }
 
-  Value *getValueCast(IRBuilder<> &IRB, Value *V, Type *Ty) {
-    if (V->getType()->isIntegerTy() && Ty->isPointerTy())
-      return IRB.CreateIntToPtr(V, Ty);
-    if (V->getType()->isPointerTy() && Ty->isIntegerTy())
-      return IRB.CreatePtrToInt(V, Ty);
-
-    return IRB.CreateBitCast(V, Ty);
-  }
-
   bool rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
     Value *Result;
     if (LI.getType() == VecTy->getElementType() ||
@@ -2364,7 +2443,7 @@ private:
                                      getName(".load"));
     }
     if (Result->getType() != LI.getType())
-      Result = getValueCast(IRB, Result, LI.getType());
+      Result = convertValue(TD, IRB, Result, LI.getType());
     LI.replaceAllUsesWith(Result);
     Pass.DeadInsts.push_back(&LI);
 
@@ -2390,9 +2469,23 @@ private:
 
     if (VecTy)
       return rewriteVectorizedLoadInst(IRB, LI, OldOp);
-    if (IntPromotionTy)
+    if (IntTy && LI.getType()->isIntegerTy())
       return rewriteIntegerLoad(IRB, LI);
 
+    if (BeginOffset == NewAllocaBeginOffset &&
+        canConvertValue(TD, NewAllocaTy, LI.getType())) {
+      Value *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                           LI.isVolatile(), getName(".load"));
+      Value *NewV = convertValue(TD, IRB, NewLI, LI.getType());
+      LI.replaceAllUsesWith(NewV);
+      Pass.DeadInsts.push_back(&LI);
+
+      DEBUG(dbgs() << "          to: " << *NewLI << "\n");
+      return !LI.isVolatile();
+    }
+
+    assert(!IntTy && "Invalid load found with int-op widening enabled");
+
     Value *NewPtr = getAdjustedAllocaPtr(IRB,
                                          LI.getPointerOperand()->getType());
     LI.setOperand(0, NewPtr);
@@ -2409,13 +2502,13 @@ private:
     if (V->getType() == ElementTy ||
         BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
       if (V->getType() != ElementTy)
-        V = getValueCast(IRB, V, ElementTy);
+        V = convertValue(TD, IRB, V, ElementTy);
       LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
                                            getName(".load"));
       V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
                                   getName(".insert"));
     } else if (V->getType() != VecTy) {
-      V = getValueCast(IRB, V, VecTy);
+      V = convertValue(TD, IRB, V, VecTy);
     }
     StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
     Pass.DeadInsts.push_back(&SI);
@@ -2442,16 +2535,31 @@ private:
 
     if (VecTy)
       return rewriteVectorizedStoreInst(IRB, SI, OldOp);
-    if (IntPromotionTy)
+    Type *ValueTy = SI.getValueOperand()->getType();
+    if (IntTy && ValueTy->isIntegerTy())
       return rewriteIntegerStore(IRB, SI);
 
     // Strip all inbounds GEPs and pointer casts to try to dig out any root
     // alloca that should be re-examined after promoting this alloca.
-    if (SI.getValueOperand()->getType()->isPointerTy())
+    if (ValueTy->isPointerTy())
       if (AllocaInst *AI = dyn_cast<AllocaInst>(SI.getValueOperand()
                                                   ->stripInBoundsOffsets()))
         Pass.PostPromotionWorklist.insert(AI);
 
+    if (BeginOffset == NewAllocaBeginOffset &&
+        canConvertValue(TD, ValueTy, NewAllocaTy)) {
+      Value *NewV = convertValue(TD, IRB, SI.getValueOperand(), NewAllocaTy);
+      StoreInst *NewSI = IRB.CreateAlignedStore(NewV, &NewAI, NewAI.getAlignment(),
+                                                SI.isVolatile());
+      (void)NewSI;
+      Pass.DeadInsts.push_back(&SI);
+
+      DEBUG(dbgs() << "          to: " << *NewSI << "\n");
+      return !SI.isVolatile();
+    }
+
+    assert(!IntTy && "Invalid store found with int-op widening enabled");
+
     Value *NewPtr = getAdjustedAllocaPtr(IRB,
                                          SI.getPointerOperand()->getType());
     SI.setOperand(1, NewPtr);
@@ -2487,10 +2595,11 @@ private:
 
     // If this doesn't map cleanly onto the alloca type, and that type isn't
     // a single value type, just emit a memset.
-    if (!VecTy && (BeginOffset != NewAllocaBeginOffset ||
-                   EndOffset != NewAllocaEndOffset ||
-                   !AllocaTy->isSingleValueType() ||
-                   !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
+    if (!VecTy && !IntTy &&
+        (BeginOffset != NewAllocaBeginOffset ||
+         EndOffset != NewAllocaEndOffset ||
+         !AllocaTy->isSingleValueType() ||
+         !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
       Type *SizeTy = II.getLength()->getType();
       Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
       CallInst *New
@@ -2508,32 +2617,24 @@ private:
     // a sensible representation for the alloca type. This is essentially
     // splatting the byte to a sufficiently wide integer, bitcasting to the
     // desired scalar type, and splatting it across any desired vector type.
+    uint64_t Size = EndOffset - BeginOffset;
     Value *V = II.getValue();
     IntegerType *VTy = cast<IntegerType>(V->getType());
-    Type *IntTy = Type::getIntNTy(VTy->getContext(),
-                                  TD.getTypeSizeInBits(ScalarTy));
-    if (TD.getTypeSizeInBits(ScalarTy) > VTy->getBitWidth())
-      V = IRB.CreateMul(IRB.CreateZExt(V, IntTy, getName(".zext")),
+    Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
+    if (Size*8 > VTy->getBitWidth())
+      V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
                         ConstantExpr::getUDiv(
-                          Constant::getAllOnesValue(IntTy),
+                          Constant::getAllOnesValue(SplatIntTy),
                           ConstantExpr::getZExt(
                             Constant::getAllOnesValue(V->getType()),
-                            IntTy)),
+                            SplatIntTy)),
                         getName(".isplat"));
-    if (V->getType() != ScalarTy) {
-      if (ScalarTy->isPointerTy())
-        V = IRB.CreateIntToPtr(V, ScalarTy);
-      else if (ScalarTy->isPrimitiveType() || ScalarTy->isVectorTy())
-        V = IRB.CreateBitCast(V, ScalarTy);
-      else if (ScalarTy->isIntegerTy())
-        llvm_unreachable("Computed different integer types with equal widths");
-      else
-        llvm_unreachable("Invalid scalar type");
-    }
 
     // If this is an element-wide memset of a vectorizable alloca, insert it.
     if (VecTy && (BeginOffset > NewAllocaBeginOffset ||
                   EndOffset < NewAllocaEndOffset)) {
+      if (V->getType() != ScalarTy)
+        V = convertValue(TD, IRB, V, ScalarTy);
       StoreInst *Store = IRB.CreateAlignedStore(
         IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
                                                       NewAI.getAlignment(),
@@ -2546,18 +2647,20 @@ private:
       return true;
     }
 
-    // Splat to a vector if needed.
-    if (VectorType *VecTy = dyn_cast<VectorType>(AllocaTy)) {
-      VectorType *SplatSourceTy = VectorType::get(V->getType(), 1);
-      V = IRB.CreateShuffleVector(
-        IRB.CreateInsertElement(UndefValue::get(SplatSourceTy), V,
-                                IRB.getInt32(0), getName(".vsplat.insert")),
-        UndefValue::get(SplatSourceTy),
-        ConstantVector::getSplat(VecTy->getNumElements(), IRB.getInt32(0)),
-        getName(".vsplat.shuffle"));
-      assert(V->getType() == VecTy);
+    // If this is a memset on an alloca where we can widen stores, insert the
+    // set integer.
+    if (IntTy && (BeginOffset > NewAllocaBeginOffset ||
+                  EndOffset < NewAllocaEndOffset)) {
+      assert(!II.isVolatile());
+      StoreInst *Store = insertInteger(IRB, V, BeginOffset);
+      (void)Store;
+      DEBUG(dbgs() << "          to: " << *Store << "\n");
+      return true;
     }
 
+    if (V->getType() != AllocaTy)
+      V = convertValue(TD, IRB, V, AllocaTy);
+
     Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
                                         II.isVolatile());
     (void)New;
@@ -2578,8 +2681,10 @@ private:
     const AllocaPartitioning::MemTransferOffsets &MTO
       = P.getMemTransferOffsets(II);
 
+    assert(OldPtr->getType()->isPointerTy() && "Must be a pointer type!");
+    unsigned AS = cast<PointerType>(OldPtr->getType())->getAddressSpace();
     // Compute the relative offset within the transfer.
-    unsigned IntPtrWidth = TD.getPointerSizeInBits();
+    unsigned IntPtrWidth = TD.getPointerSizeInBits(AS);
     APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin
                                                        : MTO.SourceBegin));
 
@@ -2618,9 +2723,9 @@ private:
     // If this doesn't map cleanly onto the alloca type, and that type isn't
     // a single value type, just emit a memcpy.
     bool EmitMemCpy
-      = !VecTy && (BeginOffset != NewAllocaBeginOffset ||
-                   EndOffset != NewAllocaEndOffset ||
-                   !NewAI.getAllocatedType()->isSingleValueType());
+      = !VecTy && !IntTy && (BeginOffset != NewAllocaBeginOffset ||
+                             EndOffset != NewAllocaEndOffset ||
+                             !NewAI.getAllocatedType()->isSingleValueType());
 
     // If we're just going to emit a memcpy, the alloca hasn't changed, and the
     // size hasn't been shrunk based on analysis of the viable range, this is
@@ -2642,14 +2747,23 @@ private:
     if (Pass.DeadSplitInsts.insert(&II))
       Pass.DeadInsts.push_back(&II);
 
-    bool IsVectorElement = VecTy && (BeginOffset > NewAllocaBeginOffset ||
-                                     EndOffset < NewAllocaEndOffset);
+    bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
+                         EndOffset == NewAllocaEndOffset;
+    bool IsVectorElement = VecTy && !IsWholeAlloca;
+    uint64_t Size = EndOffset - BeginOffset;
+    IntegerType *SubIntTy
+      = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
 
     Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
                               : II.getRawDest()->getType();
-    if (!EmitMemCpy)
-      OtherPtrTy = IsVectorElement ? VecTy->getElementType()->getPointerTo()
-                                   : NewAI.getType();
+    if (!EmitMemCpy) {
+      if (IsVectorElement)
+        OtherPtrTy = VecTy->getElementType()->getPointerTo();
+      else if (IntTy && !IsWholeAlloca)
+        OtherPtrTy = SubIntTy->getPointerTo();
+      else
+        OtherPtrTy = NewAI.getType();
+    }
 
     // Compute the other pointer, folding as much as possible to produce
     // a single, simple GEP in most cases.
@@ -2696,11 +2810,20 @@ private:
         IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
         getIndex(IRB, BeginOffset),
         getName(".copyextract"));
+    } else if (IntTy && !IsWholeAlloca && !IsDest) {
+      Src = extractInteger(IRB, SubIntTy, BeginOffset);
     } else {
       Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
                                   getName(".copyload"));
     }
 
+    if (IntTy && !IsWholeAlloca && IsDest) {
+      StoreInst *Store = insertInteger(IRB, Src, BeginOffset);
+      (void)Store;
+      DEBUG(dbgs() << "          to: " << *Store << "\n");
+      return true;
+    }
+
     if (IsVectorElement && IsDest) {
       // We have to insert into a loaded copy before storing.
       Src = IRB.CreateInsertElement(
@@ -2993,6 +3116,36 @@ private:
 };
 }
 
+/// \brief Strip aggregate type wrapping.
+///
+/// This removes no-op aggregate types wrapping an underlying type. It will
+/// strip as many layers of types as it can without changing either the type
+/// size or the allocated size.
+static Type *stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty) {
+  if (Ty->isSingleValueType())
+    return Ty;
+
+  uint64_t AllocSize = DL.getTypeAllocSize(Ty);
+  uint64_t TypeSize = DL.getTypeSizeInBits(Ty);
+
+  Type *InnerTy;
+  if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
+    InnerTy = ArrTy->getElementType();
+  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+    const StructLayout *SL = DL.getStructLayout(STy);
+    unsigned Index = SL->getElementContainingOffset(0);
+    InnerTy = STy->getElementType(Index);
+  } else {
+    return Ty;
+  }
+
+  if (AllocSize > DL.getTypeAllocSize(InnerTy) ||
+      TypeSize > DL.getTypeSizeInBits(InnerTy))
+    return Ty;
+
+  return stripAggregateTypeWrapping(DL, InnerTy);
+}
+
 /// \brief Try to find a partition of the aggregate type passed in for a given
 /// offset and size.
 ///
@@ -3009,7 +3162,7 @@ private:
 static Type *getTypePartition(const DataLayout &TD, Type *Ty,
                               uint64_t Offset, uint64_t Size) {
   if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size)
-    return Ty;
+    return stripAggregateTypeWrapping(TD, Ty);
 
   if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
     // We can't partition pointers...
@@ -3038,7 +3191,7 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
     assert(Offset == 0);
 
     if (Size == ElementSize)
-      return ElementTy;
+      return stripAggregateTypeWrapping(TD, ElementTy);
     assert(Size > ElementSize);
     uint64_t NumElements = Size / ElementSize;
     if (NumElements * ElementSize != Size)
@@ -3074,7 +3227,7 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
   assert(Offset == 0);
 
   if (Size == ElementSize)
-    return ElementTy;
+    return stripAggregateTypeWrapping(TD, ElementTy);
 
   StructType::element_iterator EI = STy->element_begin() + Index,
                                EE = STy->element_end();
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 73f53b7cec..e751750986 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -133,295 +133,7 @@ static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) {
 // String and Memory LibCall Optimizations
 //===----------------------------------------------------------------------===//
 
-//===---------------------------------------===//
-// 'strcat' Optimizations
 namespace {
-struct StrCatOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Verify the "strcat" function prototype.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 ||
-        FT->getReturnType() != B.getInt8PtrTy() ||
-        FT->getParamType(0) != FT->getReturnType() ||
-        FT->getParamType(1) != FT->getReturnType())
-      return 0;
-
-    // Extract some information from the instruction
-    Value *Dst = CI->getArgOperand(0);
-    Value *Src = CI->getArgOperand(1);
-
-    // See if we can get the length of the input string.
-    uint64_t Len = GetStringLength(Src);
-    if (Len == 0) return 0;
-    --Len;  // Unbias length.
-
-    // Handle the simple, do-nothing case: strcat(x, "") -> x
-    if (Len == 0)
-      return Dst;
-
-    // These optimizations require DataLayout.
-    if (!TD) return 0;
-
-    return EmitStrLenMemCpy(Src, Dst, Len, B);
-  }
-
-  Value *EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
-    // We need to find the end of the destination string.  That's where the
-    // memory is to be moved to. We just generate a call to strlen.
-    Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
-    if (!DstLen)
-      return 0;
-
-    // Now that we have the destination's length, we must index into the
-    // destination's pointer to get the actual memcpy destination (end of
-    // the string .. we're concatenating).
-    Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
-
-    // We have enough information to now generate the memcpy call to do the
-    // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
-    B.CreateMemCpy(CpyDst, Src,
-                   ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
-    return Dst;
-  }
-};
-
-//===---------------------------------------===//
-// 'strncat' Optimizations
-
-struct StrNCatOpt : public StrCatOpt {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Verify the "strncat" function prototype.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 3 ||
-        FT->getReturnType() != B.getInt8PtrTy() ||
-        FT->getParamType(0) != FT->getReturnType() ||
-        FT->getParamType(1) != FT->getReturnType() ||
-        !FT->getParamType(2)->isIntegerTy())
-      return 0;
-
-    // Extract some information from the instruction
-    Value *Dst = CI->getArgOperand(0);
-    Value *Src = CI->getArgOperand(1);
-    uint64_t Len;
-
-    // We don't do anything if length is not constant
-    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
-      Len = LengthArg->getZExtValue();
-    else
-      return 0;
-
-    // See if we can get the length of the input string.
-    uint64_t SrcLen = GetStringLength(Src);
-    if (SrcLen == 0) return 0;
-    --SrcLen;  // Unbias length.
-
-    // Handle the simple, do-nothing cases:
-    // strncat(x, "", c) -> x
-    // strncat(x,  c, 0) -> x
-    if (SrcLen == 0 || Len == 0) return Dst;
-
-    // These optimizations require DataLayout.
-    if (!TD) return 0;
-
-    // We don't optimize this case
-    if (Len < SrcLen) return 0;
-
-    // strncat(x, s, c) -> strcat(x, s)
-    // s is constant so the strcat can be optimized further
-    return EmitStrLenMemCpy(Src, Dst, SrcLen, B);
-  }
-};
-
-//===---------------------------------------===//
-// 'strchr' Optimizations
-
-struct StrChrOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Verify the "strchr" function prototype.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 ||
-        FT->getReturnType() != B.getInt8PtrTy() ||
-        FT->getParamType(0) != FT->getReturnType() ||
-        !FT->getParamType(1)->isIntegerTy(32))
-      return 0;
-
-    Value *SrcStr = CI->getArgOperand(0);
-
-    // If the second operand is non-constant, see if we can compute the length
-    // of the input string and turn this into memchr.
-    ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
-    if (CharC == 0) {
-      // These optimizations require DataLayout.
-      if (!TD) return 0;
-
-      uint64_t Len = GetStringLength(SrcStr);
-      if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
-        return 0;
-
-      return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
-                        ConstantInt::get(TD->getIntPtrType(*Context), Len),
-                        B, TD, TLI);
-    }
-
-    // Otherwise, the character is a constant, see if the first argument is
-    // a string literal.  If so, we can constant fold.
-    StringRef Str;
-    if (!getConstantStringInfo(SrcStr, Str))
-      return 0;
-
-    // Compute the offset, make sure to handle the case when we're searching for
-    // zero (a weird way to spell strlen).
-    size_t I = CharC->getSExtValue() == 0 ?
-        Str.size() : Str.find(CharC->getSExtValue());
-    if (I == StringRef::npos) // Didn't find the char.  strchr returns null.
-      return Constant::getNullValue(CI->getType());
-
-    // strchr(s+n,c)  -> gep(s+n+i,c)
-    return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
-  }
-};
-
-//===---------------------------------------===//
-// 'strrchr' Optimizations
-
-struct StrRChrOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Verify the "strrchr" function prototype.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 ||
-        FT->getReturnType() != B.getInt8PtrTy() ||
-        FT->getParamType(0) != FT->getReturnType() ||
-        !FT->getParamType(1)->isIntegerTy(32))
-      return 0;
-
-    Value *SrcStr = CI->getArgOperand(0);
-    ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
-
-    // Cannot fold anything if we're not looking for a constant.
-    if (!CharC)
-      return 0;
-
-    StringRef Str;
-    if (!getConstantStringInfo(SrcStr, Str)) {
-      // strrchr(s, 0) -> strchr(s, 0)
-      if (TD && CharC->isZero())
-        return EmitStrChr(SrcStr, '\0', B, TD, TLI);
-      return 0;
-    }
-
-    // Compute the offset.
-    size_t I = CharC->getSExtValue() == 0 ?
-        Str.size() : Str.rfind(CharC->getSExtValue());
-    if (I == StringRef::npos) // Didn't find the char. Return null.
-      return Constant::getNullValue(CI->getType());
-
-    // strrchr(s+n,c) -> gep(s+n+i,c)
-    return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
-  }
-};
-
-//===---------------------------------------===//
-// 'strcmp' Optimizations
-
-struct StrCmpOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Verify the "strcmp" function prototype.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 2 ||
-        !FT->getReturnType()->isIntegerTy(32) ||
-        FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != B.getInt8PtrTy())
-      return 0;
-
-    Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
-    if (Str1P == Str2P)      // strcmp(x,x)  -> 0
-      return ConstantInt::get(CI->getType(), 0);
-
-    StringRef Str1, Str2;
-    bool HasStr1 = getConstantStringInfo(Str1P, Str1);
-    bool HasStr2 = getConstantStringInfo(Str2P, Str2);
-
-    // strcmp(x, y)  -> cnst  (if both x and y are constant strings)
-    if (HasStr1 && HasStr2)
-      return ConstantInt::get(CI->getType(), Str1.compare(Str2));
-
-    if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
-      return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
-                                      CI->getType()));
-
-    if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
-      return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
-
-    // strcmp(P, "x") -> memcmp(P, "x", 2)
-    uint64_t Len1 = GetStringLength(Str1P);
-    uint64_t Len2 = GetStringLength(Str2P);
-    if (Len1 && Len2) {
-      // These optimizations require DataLayout.
-      if (!TD) return 0;
-
-      return EmitMemCmp(Str1P, Str2P,
-                        ConstantInt::get(TD->getIntPtrType(*Context),
-                        std::min(Len1, Len2)), B, TD, TLI);
-    }
-
-    return 0;
-  }
-};
-
-//===---------------------------------------===//
-// 'strncmp' Optimizations
-
-struct StrNCmpOpt : public LibCallOptimization {
-  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
-    // Verify the "strncmp" function prototype.
-    FunctionType *FT = Callee->getFunctionType();
-    if (FT->getNumParams() != 3 ||
-        !FT->getReturnType()->isIntegerTy(32) ||
-        FT->getParamType(0) != FT->getParamType(1) ||
-        FT->getParamType(0) != B.getInt8PtrTy() ||
-        !FT->getParamType(2)->isIntegerTy())
-      return 0;
-
-    Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
-    if (Str1P == Str2P)      // strncmp(x,x,n)  -> 0
-      return ConstantInt::get(CI->getType(), 0);
-
-    // Get the length argument if it is constant.
-    uint64_t Length;
-    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
-      Length = LengthArg->getZExtValue();
-    else
-      return 0;
-
-    if (Length == 0) // strncmp(x,y,0)   -> 0
-      return ConstantInt::get(CI->getType(), 0);
-
-    if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
-      return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
-
-    StringRef Str1, Str2;
-    bool HasStr1 = getConstantStringInfo(Str1P, Str1);
-    bool HasStr2 = getConstantStringInfo(Str2P, Str2);
-
-    // strncmp(x, y)  -> cnst  (if both x and y are constant strings)
-    if (HasStr1 && HasStr2) {
-      StringRef SubStr1 = Str1.substr(0, Length);
-      StringRef SubStr2 = Str2.substr(0, Length);
-      return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
-    }
-
-    if (HasStr1 && Str1.empty())  // strncmp("", x, n) -> -*x
-      return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
-                                      CI->getType()));
-
-    if (HasStr2 && Str2.empty())  // strncmp(x, "", n) -> *x
-      return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
-
-    return 0;
-  }
-};
-
-
 //===---------------------------------------===//
 // 'strcpy' Optimizations
 
@@ -638,7 +350,7 @@ struct StrToOpt : public LibCallOptimization {
       // It would be readonly too, except that it still may write to errno.
       Attributes::Builder B;
       B.addAttribute(Attributes::NoCapture);
-      CI->addAttribute(1, Attributes::get(B));
+      CI->addAttribute(1, Attributes::get(Callee->getContext(), B));
     }
 
     return 0;
@@ -1564,8 +1276,6 @@ namespace {
 
     StringMap<LibCallOptimization*> Optimizations;
     // String and Memory LibCall Optimizations
-    StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr;
-    StrCmpOpt StrCmp; StrNCmpOpt StrNCmp;
     StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
     StpCpyOpt StpCpy; StpCpyOpt StpCpyChk;
     StrNCpyOpt StrNCpy;
@@ -1639,12 +1349,6 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2,
 /// we know.
 void SimplifyLibCalls::InitOptimizations() {
   // String and Memory LibCall Optimizations
-  Optimizations["strcat"] = &StrCat;
-  Optimizations["strncat"] = &StrNCat;
-  Optimizations["strchr"] = &StrChr;
-  Optimizations["strrchr"] = &StrRChr;
-  Optimizations["strcmp"] = &StrCmp;
-  Optimizations["strncmp"] = &StrNCmp;
   Optimizations["strcpy"] = &StrCpy;
   Optimizations["strncpy"] = &StrNCpy;
   Optimizations["stpcpy"] = &StpCpy;
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 26240d4dfe..fa2faa2dad 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -41,9 +41,10 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[1] = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   ArrayRef<Attributes::AttrVal>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI),
@@ -67,9 +68,10 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[1] = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   ArrayRef<Attributes::AttrVal>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Constant *StrNLen = M->getOrInsertFunction("strnlen", AttrListPtr::get(AWI),
@@ -95,7 +97,8 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
   AttributeWithIndex AWI =
-    AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+    AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                            ArrayRef<Attributes::AttrVal>(AVs, 2));
 
   Type *I8Ptr = B.getInt8PtrTy();
   Type *I32Ty = B.getInt32Ty();
@@ -117,10 +120,11 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(2, Attributes::NoCapture);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+  AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[2] = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   ArrayRef<Attributes::AttrVal>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI),
@@ -147,8 +151,9 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(2, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   Attributes::NoUnwind);
   Type *I8Ptr = B.getInt8PtrTy();
   Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI),
                                          I8Ptr, I8Ptr, I8Ptr, NULL);
@@ -169,8 +174,9 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(2, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   Attributes::NoUnwind);
   Type *I8Ptr = B.getInt8PtrTy();
   Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI),
                                           I8Ptr, I8Ptr, I8Ptr,
@@ -193,7 +199,8 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI;
-  AWI = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+  AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                Attributes::NoUnwind);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
                                          AttrListPtr::get(AWI),
@@ -221,7 +228,8 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI;
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AWI = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                ArrayRef<Attributes::AttrVal>(AVs, 2));
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(AWI),
                                          B.getInt8PtrTy(),
@@ -246,10 +254,11 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(2, Attributes::NoCapture);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+  AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
   Attributes::AttrVal AVs[2] = { Attributes::ReadOnly, Attributes::NoUnwind };
-  AWI[2] = AttributeWithIndex::get(~0u, ArrayRef<Attributes::AttrVal>(AVs, 2));
+  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   ArrayRef<Attributes::AttrVal>(AVs, 2));
 
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI),
@@ -325,8 +334,9 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   Attributes::NoUnwind);
 
   Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI),
                                        B.getInt32Ty(),
@@ -347,8 +357,9 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[2];
-  AWI[0] = AttributeWithIndex::get(2, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+  AWI[1] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   Attributes::NoUnwind);
   Constant *F;
   if (File->getType()->isPointerTy())
     F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI),
@@ -378,9 +389,10 @@ Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(2, Attributes::NoCapture);
-  AWI[2] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+  AWI[1] = AttributeWithIndex::get(M->getContext(), 2, Attributes::NoCapture);
+  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   Attributes::NoUnwind);
   StringRef FPutsName = TLI->getName(LibFunc::fputs);
   Constant *F;
   if (File->getType()->isPointerTy())
@@ -409,9 +421,10 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
 
   Module *M = B.GetInsertBlock()->getParent()->getParent();
   AttributeWithIndex AWI[3];
-  AWI[0] = AttributeWithIndex::get(1, Attributes::NoCapture);
-  AWI[1] = AttributeWithIndex::get(4, Attributes::NoCapture);
-  AWI[2] = AttributeWithIndex::get(~0u, Attributes::NoUnwind);
+  AWI[0] = AttributeWithIndex::get(M->getContext(), 1, Attributes::NoCapture);
+  AWI[1] = AttributeWithIndex::get(M->getContext(), 4, Attributes::NoCapture);
+  AWI[2] = AttributeWithIndex::get(M->getContext(), AttrListPtr::FunctionIndex,
+                                   Attributes::NoUnwind);
   LLVMContext &Context = B.GetInsertBlock()->getContext();
   StringRef FWriteName = TLI->getName(LibFunc::fwrite);
   Constant *F;
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index c3f72b13af..620209bccb 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -28,6 +28,7 @@ add_llvm_library(LLVMTransformUtils
   SimplifyCFG.cpp
   SimplifyIndVar.cpp
   SimplifyInstructions.cpp
+  SimplifyLibCalls.cpp
   UnifyFunctionExitNodes.cpp
   Utils.cpp
   ValueMapper.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index e2932501f3..7ba9f6d9d2 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -98,10 +98,14 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
         Anew->addAttr( OldFunc->getAttributes()
                        .getParamAttributes(I->getArgNo() + 1));
     NewFunc->setAttributes(NewFunc->getAttributes()
-                           .addAttr(0, OldFunc->getAttributes()
+                           .addAttr(NewFunc->getContext(),
+                                    AttrListPtr::ReturnIndex,
+                                    OldFunc->getAttributes()
                                      .getRetAttributes()));
     NewFunc->setAttributes(NewFunc->getAttributes()
-                           .addAttr(~0, OldFunc->getAttributes()
+                           .addAttr(NewFunc->getContext(),
+                                    AttrListPtr::FunctionIndex,
+                                    OldFunc->getAttributes()
                                      .getFnAttributes()));
 
   }
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index a954d82c05..9729687a83 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -806,7 +806,8 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
                                           const DataLayout *TD) {
   assert(V->getType()->isPointerTy() &&
          "getOrEnforceKnownAlignment expects a pointer!");
-  unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+  unsigned AS = cast<PointerType>(V->getType())->getAddressSpace();
+  unsigned BitWidth = TD ? TD->getPointerSizeInBits(AS) : 64;
   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
   ComputeMaskedBits(V, KnownZero, KnownOne, TD);
   unsigned TrailZ = KnownZero.countTrailingOnes();
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 5f8953abc0..a008da67e9 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -76,6 +76,8 @@ namespace {
       // Comparing pointers is ok as we only rely on the order for uniquing.
       return Value < RHS.Value;
     }
+
+    bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
   };
 
 class SimplifyCFGOpt {
@@ -564,11 +566,7 @@ GetValueEqualityComparisonCases(TerminatorInst *TI,
 /// in the list that match the specified block.
 static void EliminateBlockCases(BasicBlock *BB,
                               std::vector<ValueEqualityComparisonCase> &Cases) {
-  for (unsigned i = 0, e = Cases.size(); i != e; ++i)
-    if (Cases[i].Dest == BB) {
-      Cases.erase(Cases.begin()+i);
-      --i; --e;
-    }
+  Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
 }
 
 /// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
new file mode 100644
index 0000000000..bd28ec3527
--- /dev/null
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -0,0 +1,579 @@
+//===------ SimplifyLibCalls.cpp - Library calls simplifier ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility pass used for testing the InstructionSimplify analysis.
+// The analysis is applied to every instruction, and if it simplifies then the
+// instruction is replaced by the simplification.  If you are looking for a pass
+// that performs serious instruction folding, use the instcombine pass instead.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/DataLayout.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Function.h"
+#include "llvm/IRBuilder.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+
+using namespace llvm;
+
+/// This class is the abstract base class for the set of optimizations that
+/// corresponds to one library call.
+namespace {
+class LibCallOptimization {
+protected:
+  Function *Caller;
+  const DataLayout *TD;
+  const TargetLibraryInfo *TLI;
+  LLVMContext* Context;
+public:
+  LibCallOptimization() { }
+  virtual ~LibCallOptimization() {}
+
+  /// callOptimizer - This pure virtual method is implemented by base classes to
+  /// do various optimizations.  If this returns null then no transformation was
+  /// performed.  If it returns CI, then it transformed the call and CI is to be
+  /// deleted.  If it returns something else, replace CI with the new value and
+  /// delete CI.
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
+    =0;
+
+  Value *optimizeCall(CallInst *CI, const DataLayout *TD,
+                      const TargetLibraryInfo *TLI, IRBuilder<> &B) {
+    Caller = CI->getParent()->getParent();
+    this->TD = TD;
+    this->TLI = TLI;
+    if (CI->getCalledFunction())
+      Context = &CI->getCalledFunction()->getContext();
+
+    // We never change the calling convention.
+    if (CI->getCallingConv() != llvm::CallingConv::C)
+      return NULL;
+
+    return callOptimizer(CI->getCalledFunction(), CI, B);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Fortified Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct FortifiedLibCallOptimization : public LibCallOptimization {
+protected:
+  virtual bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp,
+			  bool isString) const = 0;
+};
+
+struct InstFortifiedLibCallOptimization : public FortifiedLibCallOptimization {
+  CallInst *CI;
+
+  bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
+    if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
+      return true;
+    if (ConstantInt *SizeCI =
+                           dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
+      if (SizeCI->isAllOnesValue())
+        return true;
+      if (isString) {
+        uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp));
+        // If the length is 0 we don't know how long it is and so we can't
+        // remove the check.
+        if (Len == 0) return false;
+        return SizeCI->getZExtValue() >= Len;
+      }
+      if (ConstantInt *Arg = dyn_cast<ConstantInt>(
+                                                  CI->getArgOperand(SizeArgOp)))
+        return SizeCI->getZExtValue() >= Arg->getZExtValue();
+    }
+    return false;
+  }
+};
+
+struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    this->CI = CI;
+    FunctionType *FT = Callee->getFunctionType();
+    LLVMContext &Context = CI->getParent()->getContext();
+
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return 0;
+
+    if (isFoldable(3, 2, false)) {
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                     CI->getArgOperand(2), 1);
+      return CI->getArgOperand(0);
+    }
+    return 0;
+  }
+};
+
+struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    this->CI = CI;
+    FunctionType *FT = Callee->getFunctionType();
+    LLVMContext &Context = CI->getParent()->getContext();
+
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return 0;
+
+    if (isFoldable(3, 2, false)) {
+      B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+                      CI->getArgOperand(2), 1);
+      return CI->getArgOperand(0);
+    }
+    return 0;
+  }
+};
+
+struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    this->CI = CI;
+    FunctionType *FT = Callee->getFunctionType();
+    LLVMContext &Context = CI->getParent()->getContext();
+
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isIntegerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return 0;
+
+    if (isFoldable(3, 2, false)) {
+      Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
+                                   false);
+      B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+      return CI->getArgOperand(0);
+    }
+    return 0;
+  }
+};
+
+struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    this->CI = CI;
+    StringRef Name = Callee->getName();
+    FunctionType *FT = Callee->getFunctionType();
+    LLVMContext &Context = CI->getParent()->getContext();
+
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 3 ||
+        FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+        FT->getParamType(2) != TD->getIntPtrType(Context))
+      return 0;
+
+    // If a) we don't have any length information, or b) we know this will
+    // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
+    // st[rp]cpy_chk call which may fail at runtime if the size is too long.
+    // TODO: It might be nice to get a maximum length out of the possible
+    // string lengths for varying.
+    if (isFoldable(2, 1, true)) {
+      Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
+                              TLI, Name.substr(2, 6));
+      return Ret;
+    }
+    return 0;
+  }
+};
+
+struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    this->CI = CI;
+    StringRef Name = Callee->getName();
+    FunctionType *FT = Callee->getFunctionType();
+    LLVMContext &Context = CI->getParent()->getContext();
+
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+        !FT->getParamType(2)->isIntegerTy() ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return 0;
+
+    if (isFoldable(3, 2, false)) {
+      Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                               CI->getArgOperand(2), B, TD, TLI,
+                               Name.substr(2, 7));
+      return Ret;
+    }
+    return 0;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// String and Memory Library Call Optimizations
+//===----------------------------------------------------------------------===//
+
+struct StrCatOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strcat" function prototype.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        FT->getParamType(1) != FT->getReturnType())
+      return 0;
+
+    // Extract some information from the instruction
+    Value *Dst = CI->getArgOperand(0);
+    Value *Src = CI->getArgOperand(1);
+
+    // See if we can get the length of the input string.
+    uint64_t Len = GetStringLength(Src);
+    if (Len == 0) return 0;
+    --Len;  // Unbias length.
+
+    // Handle the simple, do-nothing case: strcat(x, "") -> x
+    if (Len == 0)
+      return Dst;
+
+    // These optimizations require DataLayout.
+    if (!TD) return 0;
+
+    return emitStrLenMemCpy(Src, Dst, Len, B);
+  }
+
+  Value *emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,
+                          IRBuilder<> &B) {
+    // We need to find the end of the destination string.  That's where the
+    // memory is to be moved to. We just generate a call to strlen.
+    Value *DstLen = EmitStrLen(Dst, B, TD, TLI);
+    if (!DstLen)
+      return 0;
+
+    // Now that we have the destination's length, we must index into the
+    // destination's pointer to get the actual memcpy destination (end of
+    // the string .. we're concatenating).
+    Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
+
+    // We have enough information to now generate the memcpy call to do the
+    // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
+    B.CreateMemCpy(CpyDst, Src,
+                   ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+    return Dst;
+  }
+};
+
+struct StrNCatOpt : public StrCatOpt {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strncat" function prototype.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        FT->getParamType(1) != FT->getReturnType() ||
+        !FT->getParamType(2)->isIntegerTy())
+      return 0;
+
+    // Extract some information from the instruction
+    Value *Dst = CI->getArgOperand(0);
+    Value *Src = CI->getArgOperand(1);
+    uint64_t Len;
+
+    // We don't do anything if length is not constant
+    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+      Len = LengthArg->getZExtValue();
+    else
+      return 0;
+
+    // See if we can get the length of the input string.
+    uint64_t SrcLen = GetStringLength(Src);
+    if (SrcLen == 0) return 0;
+    --SrcLen;  // Unbias length.
+
+    // Handle the simple, do-nothing cases:
+    // strncat(x, "", c) -> x
+    // strncat(x,  c, 0) -> x
+    if (SrcLen == 0 || Len == 0) return Dst;
+
+    // These optimizations require DataLayout.
+    if (!TD) return 0;
+
+    // We don't optimize this case
+    if (Len < SrcLen) return 0;
+
+    // strncat(x, s, c) -> strcat(x, s)
+    // s is constant so the strcat can be optimized further
+    return emitStrLenMemCpy(Src, Dst, SrcLen, B);
+  }
+};
+
+struct StrChrOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strchr" function prototype.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        !FT->getParamType(1)->isIntegerTy(32))
+      return 0;
+
+    Value *SrcStr = CI->getArgOperand(0);
+
+    // If the second operand is non-constant, see if we can compute the length
+    // of the input string and turn this into memchr.
+    ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+    if (CharC == 0) {
+      // These optimizations require DataLayout.
+      if (!TD) return 0;
+
+      uint64_t Len = GetStringLength(SrcStr);
+      if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
+        return 0;
+
+      return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+                        ConstantInt::get(TD->getIntPtrType(*Context), Len),
+                        B, TD, TLI);
+    }
+
+    // Otherwise, the character is a constant, see if the first argument is
+    // a string literal.  If so, we can constant fold.
+    StringRef Str;
+    if (!getConstantStringInfo(SrcStr, Str))
+      return 0;
+
+    // Compute the offset, make sure to handle the case when we're searching for
+    // zero (a weird way to spell strlen).
+    size_t I = CharC->getSExtValue() == 0 ?
+        Str.size() : Str.find(CharC->getSExtValue());
+    if (I == StringRef::npos) // Didn't find the char.  strchr returns null.
+      return Constant::getNullValue(CI->getType());
+
+    // strchr(s+n,c)  -> gep(s+n+i,c)
+    return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+  }
+};
+
+struct StrRChrOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strrchr" function prototype.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        !FT->getParamType(1)->isIntegerTy(32))
+      return 0;
+
+    Value *SrcStr = CI->getArgOperand(0);
+    ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+
+    // Cannot fold anything if we're not looking for a constant.
+    if (!CharC)
+      return 0;
+
+    StringRef Str;
+    if (!getConstantStringInfo(SrcStr, Str)) {
+      // strrchr(s, 0) -> strchr(s, 0)
+      if (TD && CharC->isZero())
+        return EmitStrChr(SrcStr, '\0', B, TD, TLI);
+      return 0;
+    }
+
+    // Compute the offset.
+    size_t I = CharC->getSExtValue() == 0 ?
+        Str.size() : Str.rfind(CharC->getSExtValue());
+    if (I == StringRef::npos) // Didn't find the char. Return null.
+      return Constant::getNullValue(CI->getType());
+
+    // strrchr(s+n,c) -> gep(s+n+i,c)
+    return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
+  }
+};
+
+struct StrCmpOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strcmp" function prototype.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        !FT->getReturnType()->isIntegerTy(32) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != B.getInt8PtrTy())
+      return 0;
+
+    Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+    if (Str1P == Str2P)      // strcmp(x,x)  -> 0
+      return ConstantInt::get(CI->getType(), 0);
+
+    StringRef Str1, Str2;
+    bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+    bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+    // strcmp(x, y)  -> cnst  (if both x and y are constant strings)
+    if (HasStr1 && HasStr2)
+      return ConstantInt::get(CI->getType(), Str1.compare(Str2));
+
+    if (HasStr1 && Str1.empty()) // strcmp("", x) -> -*x
+      return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+                                      CI->getType()));
+
+    if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+      return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+    // strcmp(P, "x") -> memcmp(P, "x", 2)
+    uint64_t Len1 = GetStringLength(Str1P);
+    uint64_t Len2 = GetStringLength(Str2P);
+    if (Len1 && Len2) {
+      // These optimizations require DataLayout.
+      if (!TD) return 0;
+
+      return EmitMemCmp(Str1P, Str2P,
+                        ConstantInt::get(TD->getIntPtrType(*Context),
+                        std::min(Len1, Len2)), B, TD, TLI);
+    }
+
+    return 0;
+  }
+};
+
+struct StrNCmpOpt : public LibCallOptimization {
+  virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strncmp" function prototype.
+    FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 ||
+        !FT->getReturnType()->isIntegerTy(32) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        !FT->getParamType(2)->isIntegerTy())
+      return 0;
+
+    Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+    if (Str1P == Str2P)      // strncmp(x,x,n)  -> 0
+      return ConstantInt::get(CI->getType(), 0);
+
+    // Get the length argument if it is constant.
+    uint64_t Length;
+    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+      Length = LengthArg->getZExtValue();
+    else
+      return 0;
+
+    if (Length == 0) // strncmp(x,y,0)   -> 0
+      return ConstantInt::get(CI->getType(), 0);
+
+    if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+      return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD, TLI);
+
+    StringRef Str1, Str2;
+    bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+    bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+    // strncmp(x, y)  -> cnst  (if both x and y are constant strings)
+    if (HasStr1 && HasStr2) {
+      StringRef SubStr1 = Str1.substr(0, Length);
+      StringRef SubStr2 = Str2.substr(0, Length);
+      return ConstantInt::get(CI->getType(), SubStr1.compare(SubStr2));
+    }
+
+    if (HasStr1 && Str1.empty())  // strncmp("", x, n) -> -*x
+      return B.CreateNeg(B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"),
+                                      CI->getType()));
+
+    if (HasStr2 && Str2.empty())  // strncmp(x, "", n) -> *x
+      return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+    return 0;
+  }
+};
+
+} // End anonymous namespace.
+
+namespace llvm {
+
+class LibCallSimplifierImpl {
+  const DataLayout *TD;
+  const TargetLibraryInfo *TLI;
+  StringMap<LibCallOptimization*> Optimizations;
+
+  // Fortified library call optimizations.
+  MemCpyChkOpt MemCpyChk;
+  MemMoveChkOpt MemMoveChk;
+  MemSetChkOpt MemSetChk;
+  StrCpyChkOpt StrCpyChk;
+  StrNCpyChkOpt StrNCpyChk;
+
+  // String and memory library call optimizations.
+  StrCatOpt StrCat;
+  StrNCatOpt StrNCat;
+  StrChrOpt StrChr;
+  StrRChrOpt StrRChr;
+  StrCmpOpt StrCmp;
+  StrNCmpOpt StrNCmp;
+
+  void initOptimizations();
+public:
+  LibCallSimplifierImpl(const DataLayout *TD, const TargetLibraryInfo *TLI) {
+    this->TD = TD;
+    this->TLI = TLI;
+  }
+
+  Value *optimizeCall(CallInst *CI);
+};
+
+void LibCallSimplifierImpl::initOptimizations() {
+  // Fortified library call optimizations.
+  Optimizations["__memcpy_chk"] = &MemCpyChk;
+  Optimizations["__memmove_chk"] = &MemMoveChk;
+  Optimizations["__memset_chk"] = &MemSetChk;
+  Optimizations["__strcpy_chk"] = &StrCpyChk;
+  Optimizations["__stpcpy_chk"] = &StrCpyChk;
+  Optimizations["__strncpy_chk"] = &StrNCpyChk;
+  Optimizations["__stpncpy_chk"] = &StrNCpyChk;
+  Optimizations["strcmp"] = &StrCmp;
+  Optimizations["strncmp"] = &StrNCmp;
+
+  // String and memory library call optimizations.
+  Optimizations["strcat"] = &StrCat;
+  Optimizations["strncat"] = &StrNCat;
+  Optimizations["strchr"] = &StrChr;
+  Optimizations["strrchr"] = &StrRChr;
+}
+
+Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
+  if (Optimizations.empty())
+    initOptimizations();
+
+  Function *Callee = CI->getCalledFunction();
+  LibCallOptimization *LCO = Optimizations.lookup(Callee->getName());
+  if (LCO) {
+    IRBuilder<> Builder(CI);
+    return LCO->optimizeCall(CI, TD, TLI, Builder);
+  }
+  return 0;
+}
+
+LibCallSimplifier::LibCallSimplifier(const DataLayout *TD,
+                                     const TargetLibraryInfo *TLI) {
+  Impl = new LibCallSimplifierImpl(TD, TLI);
+}
+
+LibCallSimplifier::~LibCallSimplifier() {
+  delete Impl;
+}
+
+Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
+  return Impl->optimizeCall(CI);
+}
+
+}
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index 16c5466f17..e81bf3c83a 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Attributes.h"
+#include "AttributesImpl.h"
 #include "LLVMContextImpl.h"
 #include "llvm/Type.h"
 #include "llvm/ADT/StringExtras.h"
@@ -27,19 +28,16 @@ using namespace llvm;
 // Attributes Implementation
 //===----------------------------------------------------------------------===//
 
-Attributes::Attributes(uint64_t Val) : Attrs(Val) {}
-
-Attributes::Attributes(LLVMContext &C, AttrVal Val)
-  : Attrs(Attributes::get(Attributes::Builder().addAttribute(Val)).Attrs) {}
-
-Attributes::Attributes(AttributesImpl *A) : Attrs(A->Bits) {}
+Attributes::Attributes(AttributesImpl *A) : Attrs(A) {}
 
 Attributes::Attributes(const Attributes &A) : Attrs(A.Attrs) {}
 
-// FIXME: This is temporary until we have implemented the uniquified version of
-// AttributesImpl.
-Attributes Attributes::get(Attributes::Builder &B) {
-  return Attributes(B.Bits);
+Attributes Attributes::get(LLVMContext &Context, ArrayRef<AttrVal> Vals) {
+  Attributes::Builder B;
+  for (ArrayRef<AttrVal>::iterator I = Vals.begin(), E = Vals.end();
+       I != E; ++I)
+    B.addAttribute(*I);
+  return Attributes::get(Context, B);
 }
 
 Attributes Attributes::get(LLVMContext &Context, Attributes::Builder &B) {
@@ -67,18 +65,22 @@ Attributes Attributes::get(LLVMContext &Context, Attributes::Builder &B) {
 }
 
 bool Attributes::hasAttribute(AttrVal Val) const {
-  return Attrs.hasAttribute(Val);
+  return Attrs && Attrs->hasAttribute(Val);
+}
+
+bool Attributes::hasAttributes() const {
+  return Attrs && Attrs->hasAttributes();
 }
 
 bool Attributes::hasAttributes(const Attributes &A) const {
-  return Attrs.hasAttributes(A);
+  return Attrs && Attrs->hasAttributes(A);
 }
 
 /// This returns the alignment field of an attribute as a byte alignment value.
 unsigned Attributes::getAlignment() const {
   if (!hasAttribute(Attributes::Alignment))
     return 0;
-  return 1U << ((Attrs.getAlignment() >> 16) - 1);
+  return 1U << ((Attrs->getAlignment() >> 16) - 1);
 }
 
 /// This returns the stack alignment field of an attribute as a byte alignment
@@ -86,36 +88,11 @@ unsigned Attributes::getAlignment() const {
 unsigned Attributes::getStackAlignment() const {
   if (!hasAttribute(Attributes::StackAlignment))
     return 0;
-  return 1U << ((Attrs.getStackAlignment() >> 26) - 1);
-}
-
-bool Attributes::isEmptyOrSingleton() const {
-  return Attrs.isEmptyOrSingleton();
-}
-
-Attributes Attributes::operator | (const Attributes &A) const {
-  return Attributes(Raw() | A.Raw());
-}
-Attributes Attributes::operator & (const Attributes &A) const {
-  return Attributes(Raw() & A.Raw());
-}
-Attributes Attributes::operator ^ (const Attributes &A) const {
-  return Attributes(Raw() ^ A.Raw());
-}
-Attributes &Attributes::operator |= (const Attributes &A) {
-  Attrs.Bits |= A.Raw();
-  return *this;
-}
-Attributes &Attributes::operator &= (const Attributes &A) {
-  Attrs.Bits &= A.Raw();
-  return *this;
-}
-Attributes Attributes::operator ~ () const {
-  return Attributes(~Raw());
+  return 1U << ((Attrs->getStackAlignment() >> 26) - 1);
 }
 
 uint64_t Attributes::Raw() const {
-  return Attrs.Bits;
+  return Attrs ? Attrs->Bits : 0; // FIXME: Don't access this directly!
 }
 
 Attributes Attributes::typeIncompatible(Type *Ty) {
@@ -134,7 +111,7 @@ Attributes Attributes::typeIncompatible(Type *Ty) {
       .addAttribute(Attributes::NoCapture)
       .addAttribute(Attributes::StructRet);
   
-  return Attributes(Incompatible.Bits); // FIXME: Use Attributes::get().
+  return Attributes::get(Ty->getContext(), Incompatible);
 }
 
 std::string Attributes::getAsString() const {
@@ -209,24 +186,30 @@ std::string Attributes::getAsString() const {
 // Attributes::Builder Implementation
 //===----------------------------------------------------------------------===//
 
-Attributes::Builder &Attributes::Builder::
-addAttribute(Attributes::AttrVal Val) {
+Attributes::Builder &Attributes::Builder::addAttribute(Attributes::AttrVal Val){
   Bits |= AttributesImpl::getAttrMask(Val);
   return *this;
 }
 
-void Attributes::Builder::addAlignmentAttr(unsigned Align) {
-  if (Align == 0) return;
+Attributes::Builder &Attributes::Builder::addRawValue(uint64_t Val) {
+  Bits |= Val;
+  return *this;
+}
+
+Attributes::Builder &Attributes::Builder::addAlignmentAttr(unsigned Align) {
+  if (Align == 0) return *this;
   assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
   assert(Align <= 0x40000000 && "Alignment too large.");
   Bits |= (Log2_32(Align) + 1) << 16;
+  return *this;
 }
-void Attributes::Builder::addStackAlignmentAttr(unsigned Align) {
+Attributes::Builder &Attributes::Builder::addStackAlignmentAttr(unsigned Align){
   // Default alignment, allow the target to define how to align it.
-  if (Align == 0) return;
+  if (Align == 0) return *this;
   assert(isPowerOf2_32(Align) && "Alignment must be a power of two.");
   assert(Align <= 0x100 && "Alignment too large.");
   Bits |= (Log2_32(Align) + 1) << 26;
+  return *this;
 }
 
 Attributes::Builder &Attributes::Builder::
@@ -235,8 +218,14 @@ removeAttribute(Attributes::AttrVal Val) {
   return *this;
 }
 
-void Attributes::Builder::removeAttributes(const Attributes &A) {
+Attributes::Builder &Attributes::Builder::addAttributes(const Attributes &A) {
+  Bits |= A.Raw();
+  return *this;
+}
+
+Attributes::Builder &Attributes::Builder::removeAttributes(const Attributes &A){
   Bits &= ~A.Raw();
+  return *this;
 }
 
 bool Attributes::Builder::hasAttribute(Attributes::AttrVal A) const {
@@ -325,10 +314,6 @@ uint64_t AttributesImpl::getStackAlignment() const {
   return Bits & getAttrMask(Attributes::StackAlignment);
 }
 
-bool AttributesImpl::isEmptyOrSingleton() const {
-  return (Bits & (Bits - 1)) == 0;
-}
-
 //===----------------------------------------------------------------------===//
 // AttributeListImpl Definition
 //===----------------------------------------------------------------------===//
@@ -500,7 +485,8 @@ Attributes &AttrListPtr::getAttributesAtIndex(unsigned i) const {
   return AttrList->Attrs[i].Attrs;
 }
 
-AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const {
+AttrListPtr AttrListPtr::addAttr(LLVMContext &C, unsigned Idx,
+                                 Attributes Attrs) const {
   Attributes OldAttrs = getAttributes(Idx);
 #ifndef NDEBUG
   // FIXME it is not obvious how this should work for alignment.
@@ -511,8 +497,9 @@ AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const {
          "Attempt to change alignment!");
 #endif
   
-  Attributes NewAttrs = OldAttrs | Attrs;
-  if (NewAttrs == OldAttrs)
+  Attributes::Builder NewAttrs =
+    Attributes::Builder(OldAttrs).addAttributes(Attrs);
+  if (NewAttrs == Attributes::Builder(OldAttrs))
     return *this;
   
   SmallVector<AttributeWithIndex, 8> NewAttrList;
@@ -527,7 +514,9 @@ AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const {
 
     // If there are attributes already at this index, merge them in.
     if (i != e && OldAttrList[i].Index == Idx) {
-      Attrs |= OldAttrList[i].Attrs;
+      Attrs =
+        Attributes::get(C, Attributes::Builder(Attrs).
+                        addAttributes(OldAttrList[i].Attrs));
       ++i;
     }
     
@@ -541,7 +530,8 @@ AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const {
   return get(NewAttrList);
 }
 
-AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const {
+AttrListPtr AttrListPtr::removeAttr(LLVMContext &C, unsigned Idx,
+                                    Attributes Attrs) const {
 #ifndef NDEBUG
   // FIXME it is not obvious how this should work for alignment.
   // For now, say we can't pass in alignment, which no current use does.
@@ -551,8 +541,9 @@ AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const {
   if (AttrList == 0) return AttrListPtr();
   
   Attributes OldAttrs = getAttributes(Idx);
-  Attributes NewAttrs = OldAttrs & ~Attrs;
-  if (NewAttrs == OldAttrs)
+  Attributes::Builder NewAttrs =
+    Attributes::Builder(OldAttrs).removeAttributes(Attrs);
+  if (NewAttrs == Attributes::Builder(OldAttrs))
     return *this;
 
   SmallVector<AttributeWithIndex, 8> NewAttrList;
@@ -565,9 +556,10 @@ AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const {
   
   // If there are attributes already at this index, merge them in.
   assert(OldAttrList[i].Index == Idx && "Attribute isn't set?");
-  Attrs = OldAttrList[i].Attrs & ~Attrs;
+  Attrs = Attributes::get(C, Attributes::Builder(OldAttrList[i].Attrs).
+                          removeAttributes(Attrs));
   ++i;
-  if (Attrs)  // If any attributes left for this parameter, add them.
+  if (Attrs.hasAttributes()) // If any attributes left for this param, add them.
     NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
   
   // Copy attributes for arguments after this one.
@@ -581,7 +573,7 @@ void AttrListPtr::dump() const {
   dbgs() << "PAL[ ";
   for (unsigned i = 0; i < getNumSlots(); ++i) {
     const AttributeWithIndex &PAWI = getSlot(i);
-    dbgs() << "{" << PAWI.Index << "," << PAWI.Attrs << "} ";
+    dbgs() << "{" << PAWI.Index << "," << PAWI.Attrs.getAsString() << "} ";
   }
   
   dbgs() << "]\n";
diff --git a/include/llvm/AttributesImpl.h b/lib/VMCore/AttributesImpl.h
index eea11a7011..93001e279f 100644
--- a/include/llvm/AttributesImpl.h
+++ b/lib/VMCore/AttributesImpl.h
@@ -36,8 +36,6 @@ public:
   uint64_t getAlignment() const;
   uint64_t getStackAlignment() const;
 
-  bool isEmptyOrSingleton() const;
-
   static uint64_t getAttrMask(uint64_t Val);
 
   void Profile(FoldingSetNodeID &ID) const {
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index 094ca75513..5fff460e8b 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -148,7 +148,8 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
   if (NewFn)
     F = NewFn;
   if (unsigned id = F->getIntrinsicID())
-    F->setAttributes(Intrinsic::getAttributes((Intrinsic::ID)id));
+    F->setAttributes(Intrinsic::getAttributes(F->getContext(),
+                                              (Intrinsic::ID)id));
   return Upgraded;
 }
 
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 90ecdaecf4..9eb7e10840 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -1381,14 +1381,20 @@ void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
 void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
   Function *Func = unwrap<Function>(Fn);
   const AttrListPtr PAL = Func->getAttributes();
-  const AttrListPtr PALnew = PAL.addAttr(~0U, Attributes(PA));
+  Attributes::Builder B(PA);
+  const AttrListPtr PALnew =
+    PAL.addAttr(Func->getContext(), AttrListPtr::FunctionIndex,
+                Attributes::get(Func->getContext(), B));
   Func->setAttributes(PALnew);
 }
 
 void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
   Function *Func = unwrap<Function>(Fn);
   const AttrListPtr PAL = Func->getAttributes();
-  const AttrListPtr PALnew = PAL.removeAttr(~0U, Attributes(PA));
+  Attributes::Builder B(PA);
+  const AttrListPtr PALnew =
+    PAL.removeAttr(Func->getContext(), AttrListPtr::FunctionIndex,
+                   Attributes::get(Func->getContext(), B));
   Func->setAttributes(PALnew);
 }
 
@@ -1458,11 +1464,15 @@ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
 }
 
 void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
-  unwrap<Argument>(Arg)->addAttr(Attributes(PA));
+  Argument *A = unwrap<Argument>(Arg);
+  Attributes::Builder B(PA);
+  A->addAttr(Attributes::get(A->getContext(), B));
 }
 
 void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
-  unwrap<Argument>(Arg)->removeAttr(Attributes(PA));
+  Argument *A = unwrap<Argument>(Arg);
+  Attributes::Builder B(PA);
+  A->removeAttr(Attributes::get(A->getContext(), B));
 }
 
 LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) {
@@ -1474,8 +1484,10 @@ LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) {
   
 
 void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
-  unwrap<Argument>(Arg)->addAttr(
-          Attributes::constructAlignmentFromInt(align));
+  Attributes::Builder B;
+  B.addAlignmentAttr(align);
+  unwrap<Argument>(Arg)->addAttr(Attributes::
+                                 get(unwrap<Argument>(Arg)->getContext(), B));
 }
 
 /*--.. Operations on basic blocks ..........................................--*/
@@ -1664,23 +1676,28 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
 void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index, 
                            LLVMAttribute PA) {
   CallSite Call = CallSite(unwrap<Instruction>(Instr));
+  Attributes::Builder B(PA);
   Call.setAttributes(
-    Call.getAttributes().addAttr(index, Attributes(PA)));
+    Call.getAttributes().addAttr(Call->getContext(), index,
+                                 Attributes::get(Call->getContext(), B)));
 }
 
 void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, 
                               LLVMAttribute PA) {
   CallSite Call = CallSite(unwrap<Instruction>(Instr));
+  Attributes::Builder B(PA);
   Call.setAttributes(
-    Call.getAttributes().removeAttr(index, Attributes(PA)));
+    Call.getAttributes().removeAttr(Call->getContext(), index,
+                                    Attributes::get(Call->getContext(), B)));
 }
 
 void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, 
                                 unsigned align) {
   CallSite Call = CallSite(unwrap<Instruction>(Instr));
-  Call.setAttributes(
-    Call.getAttributes().addAttr(index, 
-        Attributes::constructAlignmentFromInt(align)));
+  Attributes::Builder B;
+  B.addAlignmentAttr(align);
+  Call.setAttributes(Call.getAttributes().addAttr(Call->getContext(), index,
+                                       Attributes::get(Call->getContext(), B)));
 }
 
 /*--.. Operations on call instructions (only) ..............................--*/
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 5c2a03ce09..9c4f2d9399 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -185,7 +185,7 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage,
 
   // Ensure intrinsics have the right parameter attributes.
   if (unsigned IID = getIntrinsicID())
-    setAttributes(Intrinsic::getAttributes(Intrinsic::ID(IID)));
+    setAttributes(Intrinsic::getAttributes(getContext(), Intrinsic::ID(IID)));
 
 }
 
@@ -249,13 +249,13 @@ void Function::dropAllReferences() {
 
 void Function::addAttribute(unsigned i, Attributes attr) {
   AttrListPtr PAL = getAttributes();
-  PAL = PAL.addAttr(i, attr);
+  PAL = PAL.addAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
 
 void Function::removeAttribute(unsigned i, Attributes attr) {
   AttrListPtr PAL = getAttributes();
-  PAL = PAL.removeAttr(i, attr);
+  PAL = PAL.removeAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
 
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 74c0c6e1d9..13c4a5d257 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -332,21 +332,22 @@ CallInst::CallInst(const CallInst &CI)
 
 void CallInst::addAttribute(unsigned i, Attributes attr) {
   AttrListPtr PAL = getAttributes();
-  PAL = PAL.addAttr(i, attr);
+  PAL = PAL.addAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
 
 void CallInst::removeAttribute(unsigned i, Attributes attr) {
   AttrListPtr PAL = getAttributes();
-  PAL = PAL.removeAttr(i, attr);
+  PAL = PAL.removeAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
 
 bool CallInst::hasFnAttr(Attributes::AttrVal A) const {
-  if (AttributeList.getParamAttributes(~0U).hasAttribute(A))
+  if (AttributeList.getParamAttributes(AttrListPtr::FunctionIndex)
+      .hasAttribute(A))
     return true;
   if (const Function *F = getCalledFunction())
-    return F->getParamAttributes(~0U).hasAttribute(A);
+    return F->getParamAttributes(AttrListPtr::FunctionIndex).hasAttribute(A);
   return false;
 }
 
@@ -571,10 +572,11 @@ void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
 }
 
 bool InvokeInst::hasFnAttr(Attributes::AttrVal A) const {
-  if (AttributeList.getParamAttributes(~0U).hasAttribute(A))
+  if (AttributeList.getParamAttributes(AttrListPtr::FunctionIndex).
+      hasAttribute(A))
     return true;
   if (const Function *F = getCalledFunction())
-    return F->getParamAttributes(~0U).hasAttribute(A);
+    return F->getParamAttributes(AttrListPtr::FunctionIndex).hasAttribute(A);
   return false;
 }
 
@@ -588,13 +590,13 @@ bool InvokeInst::paramHasAttr(unsigned i, Attributes::AttrVal A) const {
 
 void InvokeInst::addAttribute(unsigned i, Attributes attr) {
   AttrListPtr PAL = getAttributes();
-  PAL = PAL.addAttr(i, attr);
+  PAL = PAL.addAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
 
 void InvokeInst::removeAttribute(unsigned i, Attributes attr) {
   AttrListPtr PAL = getAttributes();
-  PAL = PAL.removeAttr(i, attr);
+  PAL = PAL.removeAttr(getContext(), i, attr);
   setAttributes(PAL);
 }
 
diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp
index a86363b632..74247bdde1 100644
--- a/lib/VMCore/LLVMContextImpl.cpp
+++ b/lib/VMCore/LLVMContextImpl.cpp
@@ -97,9 +97,11 @@ LLVMContextImpl::~LLVMContextImpl() {
 
   // Destroy attributes.
   for (FoldingSetIterator<AttributesImpl> I = AttrsSet.begin(),
-         E = AttrsSet.end(); I != E; ++I)
-    delete &*I;
-  
+         E = AttrsSet.end(); I != E;) {
+    FoldingSetIterator<AttributesImpl> Elem = I++;
+    delete &*Elem;
+  }
+
   // Destroy MDNodes.  ~MDNode can move and remove nodes between the MDNodeSet
   // and the NonUniquedMDNodes sets, so copy the values out first.
   SmallVector<MDNode*, 8> MDNodes;
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 524f7e54bb..ee31814c05 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -16,9 +16,9 @@
 #define LLVM_LLVMCONTEXT_IMPL_H
 
 #include "llvm/LLVMContext.h"
+#include "AttributesImpl.h"
 #include "ConstantsContext.h"
 #include "LeaksContext.h"
-#include "llvm/AttributesImpl.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Metadata.h"
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 53744b4869..da1a452d3e 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -567,9 +567,10 @@ void Verifier::VerifyParameterAttrs(Attributes Attrs, Type *Ty,
             Attrs.hasAttribute(Attributes::AlwaysInline)), "Attributes "
           "'noinline and alwaysinline' are incompatible!", V);
 
-  Attributes TypeI = Attrs & Attributes::typeIncompatible(Ty);
-  Assert1(!TypeI, "Wrong type for attribute " +
-          TypeI.getAsString(), V);
+  Assert1(!Attributes::Builder(Attrs).
+            hasAttributes(Attributes::typeIncompatible(Ty)),
+          "Wrong types for attribute: " +
+          Attributes::typeIncompatible(Ty).getAsString(), V);
 
   if (PointerType *PTy = dyn_cast<PointerType>(Ty))
     Assert1(!Attrs.hasAttribute(Attributes::ByVal) ||
@@ -617,7 +618,7 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT,
   Attributes::Builder NotFn(FAttrs);
   NotFn.removeFunctionOnlyAttrs();
   Assert1(!NotFn.hasAttributes(), "Attributes '" +
-          Attributes::get(NotFn).getAsString() +
+          Attributes::get(V->getContext(), NotFn).getAsString() +
           "' do not apply to the function!", V);
 
   // Check for mutually incompatible attributes.
diff --git a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
index 3e78c46238..101a91396e 100644
--- a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
+++ b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
@@ -1,4 +1,9 @@
 ; RUN: llc < %s -arm-tail-calls=1 | FileCheck %s
+
+; tail call inside a function where byval argument is splitted between
+; registers and stack is currently unsupported.
+; XFAIL: *
+
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-ios"
 
diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll
index 6e1f17dced..238ba24a79 100644
--- a/test/CodeGen/ARM/coalesce-subregs.ll
+++ b/test/CodeGen/ARM/coalesce-subregs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=cortex-a9 -verify-coalescing | FileCheck %s
+; RUN: llc < %s -mcpu=cortex-a9 -verify-coalescing -verify-machineinstrs | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios0.0.0"
 
@@ -214,3 +214,78 @@ loop.end:
  %d.end = phi double [ 0.0, %entry ], [ %add, %after_inner_loop ]
  ret void
 }
+
+; CHECK: pr14078
+define arm_aapcs_vfpcc i32 @pr14078(i8* nocapture %arg, i8* nocapture %arg1, i32 %arg2) nounwind uwtable readonly {
+bb:
+  br i1 undef, label %bb31, label %bb3
+
+bb3:                                              ; preds = %bb12, %bb
+  %tmp = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp4 = bitcast <1 x i64> %tmp to <2 x float>
+  %tmp5 = shufflevector <2 x float> %tmp4, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp6 = bitcast <4 x float> %tmp5 to <2 x i64>
+  %tmp7 = shufflevector <2 x i64> %tmp6, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp8 = bitcast <1 x i64> %tmp7 to <2 x float>
+  %tmp9 = tail call <2 x float> @baz(<2 x float> <float 0xFFFFFFFFE0000000, float 0.000000e+00>, <2 x float> %tmp8, <2 x float> zeroinitializer) nounwind
+  br i1 undef, label %bb10, label %bb12
+
+bb10:                                             ; preds = %bb3
+  %tmp11 = load <4 x float>* undef, align 8
+  br label %bb12
+
+bb12:                                             ; preds = %bb10, %bb3
+  %tmp13 = shufflevector <2 x float> %tmp9, <2 x float> zeroinitializer, <2 x i32> <i32 0, i32 2>
+  %tmp14 = bitcast <2 x float> %tmp13 to <1 x i64>
+  %tmp15 = shufflevector <1 x i64> %tmp14, <1 x i64> zeroinitializer, <2 x i32> <i32 0, i32 1>
+  %tmp16 = bitcast <2 x i64> %tmp15 to <4 x float>
+  %tmp17 = fmul <4 x float> zeroinitializer, %tmp16
+  %tmp18 = bitcast <4 x float> %tmp17 to <2 x i64>
+  %tmp19 = shufflevector <2 x i64> %tmp18, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp20 = bitcast <1 x i64> %tmp19 to <2 x float>
+  %tmp21 = tail call <2 x float> @baz67(<2 x float> %tmp20, <2 x float> undef) nounwind
+  %tmp22 = tail call <2 x float> @baz67(<2 x float> %tmp21, <2 x float> %tmp21) nounwind
+  %tmp23 = shufflevector <2 x float> %tmp22, <2 x float> undef, <4 x i32> zeroinitializer
+  %tmp24 = bitcast <4 x float> %tmp23 to <2 x i64>
+  %tmp25 = shufflevector <2 x i64> %tmp24, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp26 = bitcast <1 x i64> %tmp25 to <2 x float>
+  %tmp27 = extractelement <2 x float> %tmp26, i32 0
+  %tmp28 = fcmp olt float %tmp27, 0.000000e+00
+  %tmp29 = select i1 %tmp28, i32 0, i32 undef
+  %tmp30 = icmp ult i32 undef, %arg2
+  br i1 %tmp30, label %bb3, label %bb31
+
+bb31:                                             ; preds = %bb12, %bb
+  %tmp32 = phi i32 [ 1, %bb ], [ %tmp29, %bb12 ]
+  ret i32 %tmp32
+}
+
+declare <2 x float> @baz(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
+
+declare <2 x float> @baz67(<2 x float>, <2 x float>) nounwind readnone
+
+%struct.wombat.5 = type { %struct.quux, %struct.quux, %struct.quux, %struct.quux }
+%struct.quux = type { <4 x float> }
+
+; CHECK: pr14079
+define linkonce_odr arm_aapcs_vfpcc %struct.wombat.5 @pr14079(i8* nocapture %arg, i8* nocapture %arg1, i8* nocapture %arg2) nounwind uwtable inlinehint {
+bb:
+  %tmp = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> zeroinitializer
+  %tmp3 = bitcast <1 x i64> %tmp to <2 x float>
+  %tmp4 = shufflevector <2 x float> %tmp3, <2 x float> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %tmp5 = shufflevector <2 x float> %tmp4, <2 x float> undef, <2 x i32> <i32 1, i32 3>
+  %tmp6 = bitcast <2 x float> %tmp5 to <1 x i64>
+  %tmp7 = shufflevector <1 x i64> undef, <1 x i64> %tmp6, <2 x i32> <i32 0, i32 1>
+  %tmp8 = bitcast <2 x i64> %tmp7 to <4 x float>
+  %tmp9 = shufflevector <2 x i64> zeroinitializer, <2 x i64> undef, <1 x i32> <i32 1>
+  %tmp10 = bitcast <1 x i64> %tmp9 to <2 x float>
+  %tmp11 = shufflevector <2 x float> %tmp10, <2 x float> undef, <2 x i32> <i32 0, i32 2>
+  %tmp12 = shufflevector <2 x float> %tmp11, <2 x float> undef, <2 x i32> <i32 0, i32 2>
+  %tmp13 = bitcast <2 x float> %tmp12 to <1 x i64>
+  %tmp14 = shufflevector <1 x i64> %tmp13, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %tmp15 = bitcast <2 x i64> %tmp14 to <4 x float>
+  %tmp16 = insertvalue %struct.wombat.5 undef, <4 x float> %tmp8, 1, 0
+  %tmp17 = insertvalue %struct.wombat.5 %tmp16, <4 x float> %tmp15, 2, 0
+  %tmp18 = insertvalue %struct.wombat.5 %tmp17, <4 x float> undef, 3, 0
+  ret %struct.wombat.5 %tmp18
+}
diff --git a/test/CodeGen/ARM/divmod.ll b/test/CodeGen/ARM/divmod.ll
index 7fbf8f4090..577f8aa7d3 100644
--- a/test/CodeGen/ARM/divmod.ll
+++ b/test/CodeGen/ARM/divmod.ll
@@ -1,10 +1,18 @@
-; RUN: llc < %s -mtriple=arm-apple-ios5.0 -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-apple-ios5.0 -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=arm-apple-ios5.0 -mcpu=swift     | FileCheck %s -check-prefix=SWIFT
+
+; rdar://12481395
 
 define void @foo(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp {
 entry:
-; CHECK: foo:
-; CHECK: bl ___divmodsi4
-; CHECK-NOT: bl ___divmodsi4
+; A8: foo:
+; A8: bl ___divmodsi4
+; A8-NOT: bl ___divmodsi4
+
+; SWIFT: foo:
+; SWIFT: sdiv
+; SWIFT: mls
+; SWIFT-NOT: bl __divmodsi4
   %div = sdiv i32 %x, %y
   store i32 %div, i32* %P, align 4
   %rem = srem i32 %x, %y
@@ -15,9 +23,14 @@ entry:
 
 define void @bar(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp {
 entry:
-; CHECK: bar:
-; CHECK: bl ___udivmodsi4
-; CHECK-NOT: bl ___udivmodsi4
+; A8: bar:
+; A8: bl ___udivmodsi4
+; A8-NOT: bl ___udivmodsi4
+
+; SWIFT: bar:
+; SWIFT: udiv
+; SWIFT: mls
+; SWIFT-NOT: bl __udivmodsi4
   %div = udiv i32 %x, %y
   store i32 %div, i32* %P, align 4
   %rem = urem i32 %x, %y
@@ -32,14 +45,18 @@ entry:
 
 define void @do_indent(i32 %cols) nounwind {
 entry:
-; CHECK: do_indent:
+; A8: do_indent:
+; SWIFT: do_indent:
   %0 = load i32* @flags, align 4
   %1 = and i32 %0, 67108864
   %2 = icmp eq i32 %1, 0
   br i1 %2, label %bb1, label %bb
 
 bb:
-; CHECK: bl ___divmodsi4
+; A8: bl ___divmodsi4
+; SWIFT: sdiv
+; SWIFT: mls
+; SWIFT-NOT: bl __divmodsi4
   %3 = load i32* @tabsize, align 4
   %4 = srem i32 %cols, %3
   %5 = sdiv i32 %cols, %3
@@ -60,9 +77,14 @@ declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
 ; rdar://11714607
 define i32 @howmany(i32 %x, i32 %y) nounwind {
 entry:
-; CHECK: howmany:
-; CHECK: bl ___udivmodsi4
-; CHECK-NOT: ___udivsi3
+; A8: howmany:
+; A8: bl ___udivmodsi4
+; A8-NOT: ___udivsi3
+
+; SWIFT: howmany:
+; SWIFT: udiv
+; SWIFT: mls
+; SWIFT-NOT: bl __udivmodsi4
   %rem = urem i32 %x, %y
   %div = udiv i32 %x, %y
   %not.cmp = icmp ne i32 %rem, 0
diff --git a/test/CodeGen/ARM/struct_byval.ll b/test/CodeGen/ARM/struct_byval.ll
index 99ba475ad7..e9541c2788 100644
--- a/test/CodeGen/ARM/struct_byval.ll
+++ b/test/CodeGen/ARM/struct_byval.ll
@@ -44,3 +44,47 @@ entry:
 declare i32 @e1(%struct.SmallStruct* nocapture byval %in) nounwind
 declare i32 @e2(%struct.LargeStruct* nocapture byval %in) nounwind
 declare i32 @e3(%struct.LargeStruct* nocapture byval align 16 %in) nounwind
+
+; rdar://12442472
+; We can't do tail call since address of s is passed to the callee and part of
+; s is in caller's local frame.
+define void @f3(%struct.SmallStruct* nocapture byval %s) nounwind optsize {
+; CHECK: f3
+; CHECK: bl _consumestruct
+entry:
+  %0 = bitcast %struct.SmallStruct* %s to i8*
+  tail call void @consumestruct(i8* %0, i32 80) optsize
+  ret void
+}
+
+define void @f4(%struct.SmallStruct* nocapture byval %s) nounwind optsize {
+; CHECK: f4
+; CHECK: bl _consumestruct
+entry:
+  %addr = getelementptr inbounds %struct.SmallStruct* %s, i32 0, i32 0
+  %0 = bitcast i32* %addr to i8*
+  tail call void @consumestruct(i8* %0, i32 80) optsize
+  ret void
+}
+
+; We can do tail call here since s is in the incoming argument area.
+define void @f5(i32 %a, i32 %b, i32 %c, i32 %d, %struct.SmallStruct* nocapture byval %s) nounwind optsize {
+; CHECK: f5
+; CHECK: b _consumestruct
+entry:
+  %0 = bitcast %struct.SmallStruct* %s to i8*
+  tail call void @consumestruct(i8* %0, i32 80) optsize
+  ret void
+}
+
+define void @f6(i32 %a, i32 %b, i32 %c, i32 %d, %struct.SmallStruct* nocapture byval %s) nounwind optsize {
+; CHECK: f6
+; CHECK: b _consumestruct
+entry:
+  %addr = getelementptr inbounds %struct.SmallStruct* %s, i32 0, i32 0
+  %0 = bitcast i32* %addr to i8*
+  tail call void @consumestruct(i8* %0, i32 80) optsize
+  ret void
+}
+
+declare void @consumestruct(i8* nocapture %structp, i32 %structsize) nounwind
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index a8c224b438..2cf94d63ca 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -295,3 +295,39 @@ define <4 x i32> @tduplane(<4 x i32> %invec) {
   %4 = insertelement <4 x i32> %3, i32 255, i32 3
   ret <4 x i32> %4
 }
+
+define <2 x float> @check_f32(<4 x float> %v) nounwind {
+;CHECK: check_f32:
+;CHECK: vdup.32 {{.*}}, d{{..}}[1]
+  %x = extractelement <4 x float> %v, i32 3
+  %1 = insertelement  <2 x float> undef, float %x, i32 0
+  %2 = insertelement  <2 x float> %1, float %x, i32 1
+  ret <2 x float> %2
+}
+
+define <2 x i32> @check_i32(<4 x i32> %v) nounwind {
+;CHECK: check_i32:
+;CHECK: vdup.32 {{.*}}, d{{..}}[1]
+  %x = extractelement <4 x i32> %v, i32 3
+  %1 = insertelement  <2 x i32> undef, i32 %x, i32 0
+  %2 = insertelement  <2 x i32> %1, i32 %x, i32 1
+  ret <2 x i32> %2
+}
+
+define <4 x i16> @check_i16(<8 x i16> %v) nounwind {
+;CHECK: check_i16:
+;CHECK: vdup.16 {{.*}}, d{{..}}[3]
+  %x = extractelement <8 x i16> %v, i32 3
+  %1 = insertelement  <4 x i16> undef, i16 %x, i32 0
+  %2 = insertelement  <4 x i16> %1, i16 %x, i32 1
+  ret <4 x i16> %2
+}
+
+define <8 x i8> @check_i8(<16 x i8> %v) nounwind {
+;CHECK: check_i8:
+;CHECK: vdup.8 {{.*}}, d{{..}}[3]
+  %x = extractelement <16 x i8> %v, i32 3
+  %1 = insertelement  <8  x i8> undef, i8 %x, i32 0
+  %2 = insertelement  <8  x i8> %1, i8 %x, i32 1
+  ret <8 x i8> %2
+}
diff --git a/test/CodeGen/ARM/vselect_imax.ll b/test/CodeGen/ARM/vselect_imax.ll
new file mode 100644
index 0000000000..f5994046de
--- /dev/null
+++ b/test/CodeGen/ARM/vselect_imax.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; Make sure that ARM backend with NEON handles vselect.
+
+define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) {
+; CHECK: vcgt.s32 [[QR:q[0-9]+]], [[Q1:q[0-9]+]], [[Q2:q[0-9]+]]
+; CHECK: vbsl [[QR]], [[Q1]], [[Q2]]
+    %cmpres = icmp sgt <4 x i32> %a, %b
+    %maxres = select <4 x i1> %cmpres, <4 x i32> %a,  <4 x i32> %b
+    store <4 x i32> %maxres, <4 x i32>* %m
+    ret void
+}
+
diff --git a/test/CodeGen/Mips/div.ll b/test/CodeGen/Mips/div.ll
new file mode 100644
index 0000000000..00e2c19274
--- /dev/null
+++ b/test/CodeGen/Mips/div.ll
@@ -0,0 +1,18 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@iiii = global i32 100, align 4
+@jjjj = global i32 -4, align 4
+@kkkk = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @iiii, align 4
+  %1 = load i32* @jjjj, align 4
+  %div = sdiv i32 %0, %1
+; 16:	div	$zero, ${{[0-9]+}}, ${{[0-9]+}}
+; 16: 	mflo	${{[0-9]+}}
+  store i32 %div, i32* @kkkk, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/Mips/div_rem.ll b/test/CodeGen/Mips/div_rem.ll
new file mode 100644
index 0000000000..950192eee1
--- /dev/null
+++ b/test/CodeGen/Mips/div_rem.ll
@@ -0,0 +1,21 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@iiii = global i32 103, align 4
+@jjjj = global i32 -4, align 4
+@kkkk = common global i32 0, align 4
+@llll = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @iiii, align 4
+  %1 = load i32* @jjjj, align 4
+  %div = sdiv i32 %0, %1
+  store i32 %div, i32* @kkkk, align 4
+  %rem = srem i32 %0, %1
+; 16:	div	$zero, ${{[0-9]+}}, ${{[0-9]+}}
+; 16: 	mflo	${{[0-9]+}}
+; 16: 	mfhi	${{[0-9]+}}
+  store i32 %rem, i32* @llll, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/Mips/divu.ll b/test/CodeGen/Mips/divu.ll
new file mode 100644
index 0000000000..b96a439390
--- /dev/null
+++ b/test/CodeGen/Mips/divu.ll
@@ -0,0 +1,18 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@iiii = global i32 100, align 4
+@jjjj = global i32 4, align 4
+@kkkk = common global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @iiii, align 4
+  %1 = load i32* @jjjj, align 4
+  %div = udiv i32 %0, %1
+; 16:	divu	$zero, ${{[0-9]+}}, ${{[0-9]+}}
+; 16: 	mflo	${{[0-9]+}}
+  store i32 %div, i32* @kkkk, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/Mips/divu_remu.ll b/test/CodeGen/Mips/divu_remu.ll
new file mode 100644
index 0000000000..a6c1563ac1
--- /dev/null
+++ b/test/CodeGen/Mips/divu_remu.ll
@@ -0,0 +1,23 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@iiii = global i32 103, align 4
+@jjjj = global i32 4, align 4
+@kkkk = common global i32 0, align 4
+@llll = common global i32 0, align 4
+
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @iiii, align 4
+  %1 = load i32* @jjjj, align 4
+  %div = udiv i32 %0, %1
+  store i32 %div, i32* @kkkk, align 4
+  %rem = urem i32 %0, %1
+; 16:	divu	$zero, ${{[0-9]+}}, ${{[0-9]+}}
+; 16: 	mflo	${{[0-9]+}}
+; 16: 	mfhi	${{[0-9]+}}
+  store i32 %rem, i32* @llll, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/Mips/rem.ll b/test/CodeGen/Mips/rem.ll
new file mode 100644
index 0000000000..b18f85dcbe
--- /dev/null
+++ b/test/CodeGen/Mips/rem.ll
@@ -0,0 +1,19 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@iiii = global i32 103, align 4
+@jjjj = global i32 -4, align 4
+@kkkk = common global i32 0, align 4
+
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @iiii, align 4
+  %1 = load i32* @jjjj, align 4
+  %rem = srem i32 %0, %1
+; 16:	div	$zero, ${{[0-9]+}}, ${{[0-9]+}}
+; 16: 	mfhi	${{[0-9]+}}
+  store i32 %rem, i32* @kkkk, align 4
+  ret void
+}
+
+
diff --git a/test/CodeGen/Mips/remu.ll b/test/CodeGen/Mips/remu.ll
new file mode 100644
index 0000000000..472503c384
--- /dev/null
+++ b/test/CodeGen/Mips/remu.ll
@@ -0,0 +1,18 @@
+; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@iiii = global i32 103, align 4
+@jjjj = global i32 4, align 4
+@kkkk = common global i32 0, align 4
+@.str = private unnamed_addr constant [15 x i8] c"%u = %u %% %u\0A\00", align 1
+
+define void @test() nounwind {
+entry:
+  %0 = load i32* @iiii, align 4
+  %1 = load i32* @jjjj, align 4
+  %rem = urem i32 %0, %1
+; 16:	divu	$zero, ${{[0-9]+}}, ${{[0-9]+}}
+; 16: 	mfhi	${{[0-9]+}}
+  store i32 %rem, i32* @kkkk, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/PowerPC/2012-10-12-bitcast.ll b/test/CodeGen/PowerPC/2012-10-12-bitcast.ll
new file mode 100644
index 0000000000..f841c5fb92
--- /dev/null
+++ b/test/CodeGen/PowerPC/2012-10-12-bitcast.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mattr=+altivec < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define i32 @test(<16 x i8> %v) nounwind {
+entry:
+  %0 = bitcast <16 x i8> %v to i128
+  %1 = lshr i128 %0, 96
+  %2 = trunc i128 %1 to i32
+  ret i32 %2
+}
+
+; Verify that bitcast handles big-endian platforms correctly
+; by checking we load the result from the correct offset
+
+; CHECK: addi [[REGISTER:[0-9]+]], 1, -16
+; CHECK: stvx 2, 0, [[REGISTER]]
+; CHECK: lwz 3, -16(1)
+; CHECK: blr
+
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index ae5804195c..3eb7b37ee6 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -477,3 +477,106 @@ for.inc:                                          ; preds = %for.cond
 }
 
 declare void @fn3(...)
+
+; Check coalescing of IMPLICIT_DEF instructions:
+;
+; %vreg1 = IMPLICIT_DEF
+; %vreg2 = MOV32r0
+;
+; When coalescing %vreg1 and %vreg2, the IMPLICIT_DEF instruction should be
+; erased along with its value number.
+;
+define void @rdar12474033() nounwind ssp {
+bb:
+  br i1 undef, label %bb21, label %bb1
+
+bb1:                                              ; preds = %bb
+  switch i32 undef, label %bb10 [
+    i32 4, label %bb2
+    i32 1, label %bb9
+    i32 5, label %bb3
+    i32 6, label %bb3
+    i32 2, label %bb9
+  ]
+
+bb2:                                              ; preds = %bb1
+  unreachable
+
+bb3:                                              ; preds = %bb1, %bb1
+  br i1 undef, label %bb4, label %bb5
+
+bb4:                                              ; preds = %bb3
+  unreachable
+
+bb5:                                              ; preds = %bb3
+  %tmp = load <4 x float>* undef, align 1
+  %tmp6 = bitcast <4 x float> %tmp to i128
+  %tmp7 = load <4 x float>* undef, align 1
+  %tmp8 = bitcast <4 x float> %tmp7 to i128
+  br label %bb10
+
+bb9:                                              ; preds = %bb1, %bb1
+  unreachable
+
+bb10:                                             ; preds = %bb5, %bb1
+  %tmp11 = phi i128 [ undef, %bb1 ], [ %tmp6, %bb5 ]
+  %tmp12 = phi i128 [ 0, %bb1 ], [ %tmp8, %bb5 ]
+  switch i32 undef, label %bb21 [
+    i32 2, label %bb18
+    i32 3, label %bb13
+    i32 5, label %bb16
+    i32 6, label %bb17
+    i32 1, label %bb18
+  ]
+
+bb13:                                             ; preds = %bb10
+  br i1 undef, label %bb15, label %bb14
+
+bb14:                                             ; preds = %bb13
+  br label %bb21
+
+bb15:                                             ; preds = %bb13
+  unreachable
+
+bb16:                                             ; preds = %bb10
+  unreachable
+
+bb17:                                             ; preds = %bb10
+  unreachable
+
+bb18:                                             ; preds = %bb10, %bb10
+  %tmp19 = bitcast i128 %tmp11 to <4 x float>
+  %tmp20 = bitcast i128 %tmp12 to <4 x float>
+  br label %bb21
+
+bb21:                                             ; preds = %bb18, %bb14, %bb10, %bb
+  %tmp22 = phi <4 x float> [ undef, %bb ], [ undef, %bb10 ], [ undef, %bb14 ], [ %tmp20, %bb18 ]
+  %tmp23 = phi <4 x float> [ undef, %bb ], [ undef, %bb10 ], [ undef, %bb14 ], [ %tmp19, %bb18 ]
+  store <4 x float> %tmp23, <4 x float>* undef, align 16
+  store <4 x float> %tmp22, <4 x float>* undef, align 16
+  switch i32 undef, label %bb29 [
+    i32 5, label %bb27
+    i32 1, label %bb24
+    i32 2, label %bb25
+    i32 14, label %bb28
+    i32 4, label %bb26
+  ]
+
+bb24:                                             ; preds = %bb21
+  unreachable
+
+bb25:                                             ; preds = %bb21
+  br label %bb29
+
+bb26:                                             ; preds = %bb21
+  br label %bb29
+
+bb27:                                             ; preds = %bb21
+  unreachable
+
+bb28:                                             ; preds = %bb21
+  br label %bb29
+
+bb29:                                             ; preds = %bb28, %bb26, %bb25, %bb21
+  unreachable
+}
diff --git a/test/CodeGen/X86/handle-move.ll b/test/CodeGen/X86/handle-move.ll
new file mode 100644
index 0000000000..e9f7a962e2
--- /dev/null
+++ b/test/CodeGen/X86/handle-move.ll
@@ -0,0 +1,74 @@
+; RUN: llc -march=x86-64 -mcpu=core2 -fast-isel -enable-misched -misched=shuffle -misched-bottomup -verify-machineinstrs < %s
+; RUN: llc -march=x86-64 -mcpu=core2 -fast-isel -enable-misched -misched=shuffle -misched-topdown -verify-machineinstrs < %s
+; REQUIRES: asserts
+;
+; Test the LiveIntervals::handleMove() function.
+;
+; Moving the DIV32r instruction exercises the regunit update code because
+; %EDX has a live range into the function and is used by the DIV32r.
+;
+; Here sinking a kill + dead def:
+; 144B -> 180B: DIV32r %vreg4, %EAX<imp-def>, %EDX<imp-def,dead>, %EFLAGS<imp-def,dead>, %EAX<imp-use,kill>, %EDX<imp-use>
+;       %vreg4: [48r,144r:0)  0@48r
+;         -->   [48r,180r:0)  0@48r
+;       DH:     [0B,16r:0)[128r,144r:2)[144r,144d:1)  0@0B-phi 1@144r 2@128r
+;         -->   [0B,16r:0)[128r,180r:2)[180r,180d:1)  0@0B-phi 1@180r 2@128r
+;       DL:     [0B,16r:0)[128r,144r:2)[144r,144d:1)  0@0B-phi 1@144r 2@128r
+;         -->   [0B,16r:0)[128r,180r:2)[180r,180d:1)  0@0B-phi 1@180r 2@128r
+;
+define i32 @f1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind uwtable readnone ssp {
+entry:
+  %y = add i32 %c, 1
+  %x = udiv i32 %b, %a
+  %add = add nsw i32 %y, %x
+  ret i32 %add
+}
+
+; Same as above, but moving a kill + live def:
+; 144B -> 180B: DIV32r %vreg4, %EAX<imp-def,dead>, %EDX<imp-def>, %EFLAGS<imp-def,dead>, %EAX<imp-use,kill>, %EDX<imp-use>
+;       %vreg4: [48r,144r:0)  0@48r
+;         -->   [48r,180r:0)  0@48r
+;       DH:     [0B,16r:0)[128r,144r:2)[144r,184r:1)  0@0B-phi 1@144r 2@128r
+;         -->   [0B,16r:0)[128r,180r:2)[180r,184r:1)  0@0B-phi 1@180r 2@128r
+;       DL:     [0B,16r:0)[128r,144r:2)[144r,184r:1)  0@0B-phi 1@144r 2@128r
+;         -->   [0B,16r:0)[128r,180r:2)[180r,184r:1)  0@0B-phi 1@180r 2@128r
+;
+define i32 @f2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind uwtable readnone ssp {
+entry:
+  %y = sub i32 %c, %d
+  %x = urem i32 %b, %a
+  %add = add nsw i32 %x, %y
+  ret i32 %add
+}
+
+; Moving a use below the existing kill (%vreg5):
+; Moving a tied virtual register def (%vreg11):
+;
+; 96B -> 120B: %vreg11<def,tied1> = SUB32rr %vreg11<tied0>, %vreg5
+;       %vreg11:        [80r,96r:1)[96r,144r:0)  0@96r 1@80r
+;            -->        [80r,120r:1)[120r,144r:0)  0@120r 1@80r
+;       %vreg5:         [16r,112r:0)  0@16r
+;            -->        [16r,120r:0)  0@16r
+;
+define i32 @f3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind uwtable readnone ssp {
+entry:
+  %y = sub i32 %a, %b
+  %x = add i32 %a, %b
+  %r = mul i32 %x, %y
+  ret i32 %r
+}
+
+; Move EFLAGS dead def across another def:
+; handleMove 208B -> 36B: %EDX<def> = MOV32r0 %EFLAGS<imp-def,dead>
+;    EFLAGS:    [20r,20d:4)[160r,160d:3)[208r,208d:0)[224r,224d:1)[272r,272d:2)[304r,304d:5)  0@208r 1@224r 2@272r 3@160r 4@20r 5@304r
+;         -->   [20r,20d:4)[36r,36d:0)[160r,160d:3)[224r,224d:1)[272r,272d:2)[304r,304d:5)  0@36r 1@224r 2@272r 3@160r 4@20r 5@304r
+;
+define i32 @f4(i32 %a, i32 %b, i32 %c, i32 %d) nounwind uwtable readnone ssp {
+entry:
+  %x = sub i32 %a, %b
+  %y = sub i32 %b, %c
+  %z = sub i32 %c, %d
+  %r1 = udiv i32 %x, %y
+  %r2 = mul i32 %z, %r1
+  ret i32 %r2
+}
diff --git a/test/CodeGen/X86/misched-ilp.ll b/test/CodeGen/X86/misched-ilp.ll
new file mode 100644
index 0000000000..c6cedb7be8
--- /dev/null
+++ b/test/CodeGen/X86/misched-ilp.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=core2 -enable-misched -misched=ilpmax | FileCheck -check-prefix=MAX %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mcpu=core2 -enable-misched -misched=ilpmin | FileCheck -check-prefix=MIN %s
+;
+; Basic verification of the ScheduleDAGILP metric.
+;
+; MAX: addss
+; MAX: addss
+; MAX: addss
+; MAX: subss
+; MAX: addss
+;
+; MIN: addss
+; MIN: addss
+; MIN: subss
+; MIN: addss
+; MIN: addss
+define float @ilpsched(float %a, float %b, float %c, float %d, float %e, float %f) nounwind uwtable readnone ssp {
+entry:
+  %add = fadd float %a, %b
+  %add1 = fadd float %c, %d
+  %add2 = fadd float %e, %f
+  %add3 = fsub float %add1, %add2
+  %add4 = fadd float %add, %add3
+  ret float %add4
+}
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index 2e39473057..3bec3acdbf 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -344,3 +344,16 @@ entry:
 ; ATOM: negw
 ; ATOM: sbbw
 }
+
+define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind {
+  %cmp = icmp slt i32 %x, 15
+  %sel = select i1 %cmp, i8 %a, i8 %b
+  ret i8 %sel
+; CHECK: test18:
+; CHECK: cmpl $15, %edi
+; CHECK: cmovgel %edx
+
+; ATOM: test18:
+; ATOM: cmpl $15, %edi
+; ATOM: cmovgel %edx
+}
diff --git a/test/Instrumentation/AddressSanitizer/instrument_global.ll b/test/Instrumentation/AddressSanitizer/instrument_global.ll
index ba8d65a4fa..3d92946087 100644
--- a/test/Instrumentation/AddressSanitizer/instrument_global.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument_global.ll
@@ -6,8 +6,8 @@ target triple = "x86_64-unknown-linux-gnu"
 ; If a global is present, __asan_[un]register_globals should be called from
 ; module ctor/dtor
 
-; CHECK: llvm.global_dtors
 ; CHECK: llvm.global_ctors
+; CHECK: llvm.global_dtors
 
 ; CHECK: define internal void @asan.module_ctor
 ; CHECK-NOT: ret
diff --git a/test/MC/X86/x86_nop.s b/test/MC/X86/x86_nop.s
index de0fc08834..396e3022eb 100644
--- a/test/MC/X86/x86_nop.s
+++ b/test/MC/X86/x86_nop.s
@@ -1,7 +1,13 @@
-# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=geode %s -o %t
-# RUN: llvm-objdump -disassemble %t | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=generic %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=i386 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=i486 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=i586 %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=pentium %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=pentium-mmx %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=geode %s | llvm-objdump -d - | FileCheck %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -mcpu=i686 %s | llvm-objdump -d - | not FileCheck %s
 
-# CHECK-NOT: nopw
+# CHECK-NOT: nop{{[lw]}}
 inc %eax
 .align 8
 inc %eax
diff --git a/test/Transforms/InstCombine/strcat-1.ll b/test/Transforms/InstCombine/strcat-1.ll
new file mode 100644
index 0000000000..3c05d6b06f
--- /dev/null
+++ b/test/Transforms/InstCombine/strcat-1.ll
@@ -0,0 +1,38 @@
+; Test that the strcat libcall simplifier works correctly per the
+; bug found in PR3661.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@null = constant [1 x i8] zeroinitializer
+@null_hello = constant [7 x i8] c"\00hello\00"
+
+declare i8* @strcat(i8*, i8*)
+declare i32 @puts(i8*)
+
+define i32 @main() {
+; CHECK: @main
+; CHECK-NOT: call i8* @strcat
+; CHECK: call i32 @puts
+
+  %target = alloca [1024 x i8]
+  %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
+  store i8 0, i8* %arg1
+
+  ; rslt1 = strcat(target, "hello\00")
+  %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  %rslt1 = call i8* @strcat(i8* %arg1, i8* %arg2)
+
+  ; rslt2 = strcat(rslt1, "\00")
+  %arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0
+  %rslt2 = call i8* @strcat(i8* %rslt1, i8* %arg3)
+
+  ; rslt3 = strcat(rslt2, "\00hello\00")
+  %arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0
+  %rslt3 = call i8* @strcat(i8* %rslt2, i8* %arg4)
+
+  call i32 @puts( i8* %rslt3 )
+  ret i32 0
+}
diff --git a/test/Transforms/InstCombine/strcat-2.ll b/test/Transforms/InstCombine/strcat-2.ll
new file mode 100644
index 0000000000..379ee74953
--- /dev/null
+++ b/test/Transforms/InstCombine/strcat-2.ll
@@ -0,0 +1,32 @@
+; Test that the strcat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@empty = constant [1 x i8] c"\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @strcat(i8*, i8*)
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+; CHECK-NOT: call i8* @strcat
+; CHECK: ret void
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  call i8* @strcat(i8* %dst, i8* %src)
+  ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+; CHECK-NEXT: ret void
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [1 x i8]* @empty, i32 0, i32 0
+  call i8* @strcat(i8* %dst, i8* %src)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strcat-3.ll b/test/Transforms/InstCombine/strcat-3.ll
new file mode 100644
index 0000000000..15aff2f1aa
--- /dev/null
+++ b/test/Transforms/InstCombine/strcat-3.ll
@@ -0,0 +1,22 @@
+; Test that the strcat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@empty = constant [1 x i8] c"\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @strcat(i8*, i8*)
+
+define void @test_nosimplify1() {
+; CHECK: @test_nosimplify1
+; CHECK: call i16* @strcat
+; CHECK: ret void
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  call i16* @strcat(i8* %dst, i8* %src)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strchr-1.ll b/test/Transforms/InstCombine/strchr-1.ll
new file mode 100644
index 0000000000..5efab9ec4b
--- /dev/null
+++ b/test/Transforms/InstCombine/strchr-1.ll
@@ -0,0 +1,54 @@
+; Test that the strchr library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@null = constant [1 x i8] zeroinitializer
+@chp = global i8* zeroinitializer
+
+declare i8* @strchr(i8*, i32)
+
+define void @test_simplify1() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 6)
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+  %str = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %str, i32 119)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_simplify2() {
+; CHECK: store i8* null, i8** @chp, align 4
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+  %str = getelementptr [1 x i8]* @null, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %str, i32 119)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_simplify3() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %src, i32 0)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_simplify4(i32 %chr) {
+; CHECK: call i8* @memchr
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strchr(i8* %src, i32 %chr)
+  store i8* %dst, i8** @chp
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strchr-2.ll b/test/Transforms/InstCombine/strchr-2.ll
new file mode 100644
index 0000000000..35bbd23e6d
--- /dev/null
+++ b/test/Transforms/InstCombine/strchr-2.ll
@@ -0,0 +1,21 @@
+; Test that the strchr libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@chr = global i8 zeroinitializer
+
+declare i8 @strchr(i8*, i32)
+
+define void @test_nosimplify1() {
+; CHECK: test_nosimplify1
+; CHECK: call i8 @strchr
+; CHECK: ret void
+
+  %str = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8 @strchr(i8* %str, i32 119)
+  store i8 %dst, i8* @chr
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strcmp-1.ll b/test/Transforms/InstCombine/strcmp-1.ll
new file mode 100644
index 0000000000..0679246e09
--- /dev/null
+++ b/test/Transforms/InstCombine/strcmp-1.ll
@@ -0,0 +1,82 @@
+; Test that the strcmp library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@hell = constant [5 x i8] c"hell\00"
+@bell = constant [5 x i8] c"bell\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i32 @strcmp(i8*, i8*)
+
+; strcmp("", x) -> -*x
+define i32 @test1(i8* %str2) {
+; CHECK: @test1
+; CHECK: %strcmpload = load i8* %str
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: %2 = sub i32 0, %1
+; CHECK: ret i32 %2
+
+  %str1 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+
+}
+
+; strcmp(x, "") -> *x
+define i32 @test2(i8* %str1) {
+; CHECK: @test2
+; CHECK: %strcmpload = load i8* %str
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: ret i32 %1
+
+  %str2 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+}
+
+; strcmp(x, y)  -> cnst
+define i32 @test3() {
+; CHECK: @test3
+; CHECK: ret i32 -1
+
+  %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+}
+
+define i32 @test4() {
+; CHECK: @test4
+; CHECK: ret i32 1
+
+  %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp1
+}
+
+; strcmp(x, y)   -> memcmp(x, y, <known length>)
+; (This transform is rather difficult to trigger in a useful manner)
+define i32 @test5(i1 %b) {
+; CHECK: @test5
+; CHECK: %memcmp = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0), i8* %str2, i32 5)
+; CHECK: ret i32 %memcmp
+
+  %str1 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+  %temp2 = getelementptr inbounds [5 x i8]* @bell, i32 0, i32 0
+  %str2 = select i1 %b, i8* %temp1, i8* %temp2
+  %temp3 = call i32 @strcmp(i8* %str1, i8* %str2)
+  ret i32 %temp3
+}
+
+; strcmp(x,x)  -> 0
+define i32 @test6(i8* %str) {
+; CHECK: @test6
+; CHECK: ret i32 0
+
+  %temp1 = call i32 @strcmp(i8* %str, i8* %str)
+  ret i32 %temp1
+}
diff --git a/test/Transforms/InstCombine/strcmp-2.ll b/test/Transforms/InstCombine/strcmp-2.ll
new file mode 100644
index 0000000000..20518960f3
--- /dev/null
+++ b/test/Transforms/InstCombine/strcmp-2.ll
@@ -0,0 +1,20 @@
+; Test that the strcmp library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@hell = constant [5 x i8] c"hell\00"
+
+declare i16 @strcmp(i8*, i8*)
+
+define i16 @test_nosimplify() {
+; CHECK: @test_nosimplify
+; CHECK: call i16 @strcmp
+; CHECK: ret i16 %temp1
+
+  %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = call i16 @strcmp(i8* %str1, i8* %str2)
+  ret i16 %temp1
+}
diff --git a/test/Transforms/InstCombine/strncat-1.ll b/test/Transforms/InstCombine/strncat-1.ll
new file mode 100644
index 0000000000..ad2a18b146
--- /dev/null
+++ b/test/Transforms/InstCombine/strncat-1.ll
@@ -0,0 +1,37 @@
+; Test that the strncat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@null = constant [1 x i8] zeroinitializer
+@null_hello = constant [7 x i8] c"\00hello\00"
+
+declare i8* @strncat(i8*, i8*, i32)
+declare i32 @puts(i8*)
+
+define i32 @main() {
+; CHECK: @main
+; CHECK-NOT: call i8* @strncat
+; CHECK: call i32 @puts
+
+  %target = alloca [1024 x i8]
+  %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
+  store i8 0, i8* %arg1
+
+  ; rslt1 = strncat(target, "hello\00")
+  %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  %rslt1 = call i8* @strncat(i8* %arg1, i8* %arg2, i32 6)
+
+  ; rslt2 = strncat(rslt1, "\00")
+  %arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0
+  %rslt2 = call i8* @strncat(i8* %rslt1, i8* %arg3, i32 42)
+
+  ; rslt3 = strncat(rslt2, "\00hello\00")
+  %arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0
+  %rslt3 = call i8* @strncat(i8* %rslt2, i8* %arg4, i32 42)
+
+  call i32 @puts(i8* %rslt3)
+  ret i32 0
+}
diff --git a/test/Transforms/InstCombine/strncat-2.ll b/test/Transforms/InstCombine/strncat-2.ll
new file mode 100644
index 0000000000..c56deacd39
--- /dev/null
+++ b/test/Transforms/InstCombine/strncat-2.ll
@@ -0,0 +1,53 @@
+; Test that the strncat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@empty = constant [1 x i8] c"\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @strncat(i8*, i8*, i32)
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+; CHECK-NOT: call i8* @strncat
+; CHECK: ret void
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  call i8* @strncat(i8* %dst, i8* %src, i32 13)
+  ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+; CHECK-NEXT: ret void
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [1 x i8]* @empty, i32 0, i32 0
+  call i8* @strncat(i8* %dst, i8* %src, i32 13)
+  ret void
+}
+
+define void @test_simplify3() {
+; CHECK: @test_simplify3
+; CHECK-NEXT: ret void
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  call i8* @strncat(i8* %dst, i8* %src, i32 0)
+  ret void
+}
+
+define void @test_nosimplify1() {
+; CHECK: @test_nosimplify1
+; CHECK: call i8* @strncat
+; CHECK: ret void
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  call i8* @strncat(i8* %dst, i8* %src, i32 1)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strncat-3.ll b/test/Transforms/InstCombine/strncat-3.ll
new file mode 100644
index 0000000000..3cd7971687
--- /dev/null
+++ b/test/Transforms/InstCombine/strncat-3.ll
@@ -0,0 +1,22 @@
+; Test that the strncat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@empty = constant [1 x i8] c"\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @strncat(i8*, i8*, i32)
+
+define void @test_nosimplify1() {
+; CHECK: @test_nosimplify1
+; CHECK: call i16* @strncat
+; CHECK: ret void
+
+  %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+  %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  call i16* @strncat(i8* %dst, i8* %src, i32 13)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strncmp-1.ll b/test/Transforms/InstCombine/strncmp-1.ll
new file mode 100644
index 0000000000..48b26d1a5f
--- /dev/null
+++ b/test/Transforms/InstCombine/strncmp-1.ll
@@ -0,0 +1,97 @@
+; Test that the strncmp library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@hell = constant [5 x i8] c"hell\00"
+@bell = constant [5 x i8] c"bell\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i32 @strncmp(i8*, i8*, i32)
+
+; strncmp("", x, n) -> -*x
+define i32 @test1(i8* %str2) {
+; CHECK: @test1
+; CHECK: %strcmpload = load i8* %str
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: %2 = sub i32 0, %1
+; CHECK: ret i32 %2
+
+  %str1 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 10)
+  ret i32 %temp1
+}
+
+; strncmp(x, "", n) -> *x
+define i32 @test2(i8* %str1) {
+; CHECK: @test2
+; CHECK: %strcmpload = load i8* %str1
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: ret i32 %1
+
+  %str2 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 10)
+  ret i32 %temp1
+}
+
+; strncmp(x, y, n)  -> cnst
+define i32 @test3() {
+; CHECK: @test3
+; CHECK: ret i32 -1
+
+  %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 10)
+  ret i32 %temp1
+}
+
+define i32 @test4() {
+; CHECK: @test4
+; CHECK: ret i32 1
+
+  %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 10)
+  ret i32 %temp1
+}
+
+define i32 @test5() {
+; CHECK: @test5
+; CHECK: ret i32 0
+
+  %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 4)
+  ret i32 %temp1
+}
+
+; strncmp(x,y,1) -> memcmp(x,y,1)
+; TODO: Once the memcmp simplifier gets moved into the instcombine pass
+; the following memcmp will be folded into two loads and a subtract.
+define i32 @test6(i8* %str1, i8* %str2) {
+; CHECK: @test6
+; CHECK: call i32 @memcmp
+; CHECK: ret i32 %memcmp
+
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 1)
+  ret i32 %temp1
+}
+
+; strncmp(x,y,0)   -> 0
+define i32 @test7(i8* %str1, i8* %str2) {
+; CHECK: @test7
+; CHECK: ret i32 0
+
+  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 0)
+  ret i32 %temp1
+}
+
+; strncmp(x,x,n)  -> 0
+define i32 @test8(i8* %str, i32 %n) {
+; CHECK: @test8
+; CHECK: ret i32 0
+
+  %temp1 = call i32 @strncmp(i8* %str, i8* %str, i32 %n)
+  ret i32 %temp1
+}
diff --git a/test/Transforms/InstCombine/strncmp-2.ll b/test/Transforms/InstCombine/strncmp-2.ll
new file mode 100644
index 0000000000..3fc43a6fd4
--- /dev/null
+++ b/test/Transforms/InstCombine/strncmp-2.ll
@@ -0,0 +1,20 @@
+; Test that the strncmp library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@hell = constant [5 x i8] c"hell\00"
+
+declare i16 @strncmp(i8*, i8*, i32)
+
+define i16 @test_nosimplify() {
+; CHECK: @test_nosimplify
+; CHECK: call i16 @strncmp
+; CHECK: ret i16 %temp1
+
+  %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+  %str2 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+  %temp1 = call i16 @strncmp(i8* %str1, i8* %str2, i32 10)
+  ret i16 %temp1
+}
diff --git a/test/Transforms/InstCombine/strrchr-1.ll b/test/Transforms/InstCombine/strrchr-1.ll
new file mode 100644
index 0000000000..854ce45bff
--- /dev/null
+++ b/test/Transforms/InstCombine/strrchr-1.ll
@@ -0,0 +1,54 @@
+; Test that the strrchr library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@null = constant [1 x i8] zeroinitializer
+@chp = global i8* zeroinitializer
+
+declare i8* @strrchr(i8*, i32)
+
+define void @test_simplify1() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 6)
+; CHECK-NOT: call i8* @strrchr
+; CHECK: ret void
+
+  %str = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strrchr(i8* %str, i32 119)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_simplify2() {
+; CHECK: store i8* null, i8** @chp, align 4
+; CHECK-NOT: call i8* @strrchr
+; CHECK: ret void
+
+  %str = getelementptr [1 x i8]* @null, i32 0, i32 0
+  %dst = call i8* @strrchr(i8* %str, i32 119)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_simplify3() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strrchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strrchr(i8* %src, i32 0)
+  store i8* %dst, i8** @chp
+  ret void
+}
+
+define void @test_nosimplify1(i32 %chr) {
+; CHECK: @test_nosimplify1
+; CHECK: call i8* @strrchr
+; CHECK: ret void
+
+  %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8* @strrchr(i8* %src, i32 %chr)
+  store i8* %dst, i8** @chp
+  ret void
+}
diff --git a/test/Transforms/InstCombine/strrchr-2.ll b/test/Transforms/InstCombine/strrchr-2.ll
new file mode 100644
index 0000000000..1974f6ca60
--- /dev/null
+++ b/test/Transforms/InstCombine/strrchr-2.ll
@@ -0,0 +1,21 @@
+; Test that the strrchr libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@chr = global i8 zeroinitializer
+
+declare i8 @strrchr(i8*, i32)
+
+define void @test_nosimplify1() {
+; CHECK: test_nosimplify1
+; CHECK: call i8 @strrchr
+; CHECK: ret void
+
+  %str = getelementptr [14 x i8]* @hello, i32 0, i32 0
+  %dst = call i8 @strrchr(i8* %str, i32 119)
+  store i8 %dst, i8* @chr
+  ret void
+}
diff --git a/test/Transforms/InstCombine/weak-symbols.ll b/test/Transforms/InstCombine/weak-symbols.ll
new file mode 100644
index 0000000000..0039b5962f
--- /dev/null
+++ b/test/Transforms/InstCombine/weak-symbols.ll
@@ -0,0 +1,33 @@
+; PR4738 - Test that the library call simplifier doesn't assume anything about
+; weak symbols.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@real_init = weak_odr constant [2 x i8] c"y\00"
+@fake_init = weak constant [2 x i8] c"y\00"
+@.str = private constant [2 x i8] c"y\00"
+
+define i32 @foo() nounwind {
+; CHECK: define i32 @foo
+; CHECK: call i32 @strcmp
+; CHECK: ret i32 %temp1
+
+entry:
+  %str1 = getelementptr inbounds [2 x i8]* @fake_init, i64 0, i64 0
+  %str2 = getelementptr inbounds [2 x i8]* @.str, i64 0, i64 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2) nounwind readonly
+  ret i32 %temp1
+}
+
+define i32 @bar() nounwind {
+; CHECK: define i32 @bar
+; CHECK: ret i32 0
+
+entry:
+  %str1 = getelementptr inbounds [2 x i8]* @real_init, i64 0, i64 0
+  %str2 = getelementptr inbounds [2 x i8]* @.str, i64 0, i64 0
+  %temp1 = call i32 @strcmp(i8* %str1, i8* %str2) nounwind readonly
+  ret i32 %temp1
+}
+
+declare i32 @strcmp(i8*, i8*) nounwind readonly
diff --git a/test/Transforms/SROA/alignment.ll b/test/Transforms/SROA/alignment.ll
index 945ad91002..ad5fb6c4a5 100644
--- a/test/Transforms/SROA/alignment.ll
+++ b/test/Transforms/SROA/alignment.ll
@@ -84,37 +84,6 @@ entry:
   ret void
 }
 
-%struct.S = type { i8, { i64 } }
-
-define void @test4() {
-; This test case triggered very strange alignment behavior with memcpy due to
-; strange splitting. Reported by Duncan.
-; CHECK: @test4
-
-entry:
-  %D.2113 = alloca %struct.S
-  %Op = alloca %struct.S
-  %D.2114 = alloca %struct.S
-  %gep1 = getelementptr inbounds %struct.S* %Op, i32 0, i32 0
-  store i8 0, i8* %gep1, align 8
-  %gep2 = getelementptr inbounds %struct.S* %Op, i32 0, i32 1, i32 0
-  %cast = bitcast i64* %gep2 to double*
-  store double 0.000000e+00, double* %cast, align 8
-  store i64 0, i64* %gep2, align 8
-  %dst1 = bitcast %struct.S* %D.2114 to i8*
-  %src1 = bitcast %struct.S* %Op to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst1, i8* %src1, i32 16, i32 8, i1 false)
-  %dst2 = bitcast %struct.S* %D.2113 to i8*
-  %src2 = bitcast %struct.S* %D.2114 to i8*
-  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst2, i8* %src2, i32 16, i32 8, i1 false)
-; We get 3 memcpy calls with various reasons to shrink their alignment to 1.
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 3, i32 1, i1 false)
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 8, i32 1, i1 false)
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 11, i32 1, i1 false)
-
-  ret void
-}
-
 define void @test5() {
 ; Test that we preserve underaligned loads and stores when splitting.
 ; CHECK: @test5
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index e7767ef5e9..644fda167d 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -409,8 +409,11 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define i16 @test5() {
 ; CHECK: @test5
-; CHECK: alloca float
-; CHECK: ret i16 %
+; CHECK-NOT: alloca float
+; CHECK:      %[[cast:.*]] = bitcast float 0.0{{.*}} to i32
+; CHECK-NEXT: %[[shr:.*]] = lshr i32 %[[cast]], 16
+; CHECK-NEXT: %[[trunc:.*]] = trunc i32 %[[shr]] to i16
+; CHECK-NEXT: ret i16 %[[trunc]]
 
 entry:
   %a = alloca [4 x i8]
@@ -968,3 +971,95 @@ entry:
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast0, i8* %cast1, i32 12, i32 0, i1 false)
   ret void
 }
+
+define i32 @test22(i32 %x) {
+; Test that SROA and promotion is not confused by a grab bax mixture of pointer
+; types involving wrapper aggregates and zero-length aggregate members.
+; CHECK: @test22
+
+entry:
+  %a1 = alloca { { [1 x { i32 }] } }
+  %a2 = alloca { {}, { float }, [0 x i8] }
+  %a3 = alloca { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }
+; CHECK-NOT: alloca
+
+  %wrap1 = insertvalue [1 x { i32 }] undef, i32 %x, 0, 0
+  %gep1 = getelementptr { { [1 x { i32 }] } }* %a1, i32 0, i32 0, i32 0
+  store [1 x { i32 }] %wrap1, [1 x { i32 }]* %gep1
+
+  %gep2 = getelementptr { { [1 x { i32 }] } }* %a1, i32 0, i32 0
+  %ptrcast1 = bitcast { [1 x { i32 }] }* %gep2 to { [1 x { float }] }*
+  %load1 = load { [1 x { float }] }* %ptrcast1
+  %unwrap1 = extractvalue { [1 x { float }] } %load1, 0, 0
+
+  %wrap2 = insertvalue { {}, { float }, [0 x i8] } undef, { float } %unwrap1, 1
+  store { {}, { float }, [0 x i8] } %wrap2, { {}, { float }, [0 x i8] }* %a2
+
+  %gep3 = getelementptr { {}, { float }, [0 x i8] }* %a2, i32 0, i32 1, i32 0
+  %ptrcast2 = bitcast float* %gep3 to <4 x i8>*
+  %load3 = load <4 x i8>* %ptrcast2
+  %valcast1 = bitcast <4 x i8> %load3 to i32
+
+  %wrap3 = insertvalue [1 x [1 x i32]] undef, i32 %valcast1, 0, 0
+  %wrap4 = insertvalue { [1 x [1 x i32]], {} } undef, [1 x [1 x i32]] %wrap3, 0
+  %gep4 = getelementptr { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }* %a3, i32 0, i32 1
+  %ptrcast3 = bitcast { [0 x double], [1 x [1 x <4 x i8>]], {} }* %gep4 to { [1 x [1 x i32]], {} }*
+  store { [1 x [1 x i32]], {} } %wrap4, { [1 x [1 x i32]], {} }* %ptrcast3
+
+  %gep5 = getelementptr { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }* %a3, i32 0, i32 1, i32 1, i32 0
+  %ptrcast4 = bitcast [1 x <4 x i8>]* %gep5 to { {}, float, {} }*
+  %load4 = load { {}, float, {} }* %ptrcast4
+  %unwrap2 = extractvalue { {}, float, {} } %load4, 1
+  %valcast2 = bitcast float %unwrap2 to i32
+
+  ret i32 %valcast2
+; CHECK: ret i32
+}
+
+define void @PR14059.1(double* %d) {
+; In PR14059 a peculiar construct was identified as something that is used
+; pervasively in ARM's ABI-calling-convention lowering: the passing of a struct
+; of doubles via an array of i32 in order to place the data into integer
+; registers. This in turn was missed as an optimization by SROA due to the
+; partial loads and stores of integers to the double alloca we were trying to
+; form and promote. The solution is to widen the integer operations to be
+; whole-alloca operations, and perform the appropriate bitcasting on the
+; *values* rather than the pointers. When this works, partial reads and writes
+; via integers can be promoted away.
+; CHECK: @PR14059.1
+; CHECK-NOT: alloca
+; CHECK: ret void
+
+entry:
+  %X.sroa.0.i = alloca double, align 8
+  %0 = bitcast double* %X.sroa.0.i to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %0)
+
+  ; Store to the low 32-bits...
+  %X.sroa.0.0.cast2.i = bitcast double* %X.sroa.0.i to i32*
+  store i32 0, i32* %X.sroa.0.0.cast2.i, align 8
+
+  ; Also use a memset to the middle 32-bits for fun.
+  %X.sroa.0.2.raw_idx2.i = getelementptr inbounds i8* %0, i32 2
+  call void @llvm.memset.p0i8.i64(i8* %X.sroa.0.2.raw_idx2.i, i8 0, i64 4, i32 1, i1 false)
+
+  ; Or a memset of the whole thing.
+  call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 1, i1 false)
+
+  ; Write to the high 32-bits with a memcpy.
+  %X.sroa.0.4.raw_idx4.i = getelementptr inbounds i8* %0, i32 4
+  %d.raw = bitcast double* %d to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %X.sroa.0.4.raw_idx4.i, i8* %d.raw, i32 4, i32 1, i1 false)
+
+  ; Store to the high 32-bits...
+  %X.sroa.0.4.cast5.i = bitcast i8* %X.sroa.0.4.raw_idx4.i to i32*
+  store i32 1072693248, i32* %X.sroa.0.4.cast5.i, align 4
+
+  ; Do the actual math...
+  %X.sroa.0.0.load1.i = load double* %X.sroa.0.i, align 8
+  %accum.real.i = load double* %d, align 8
+  %add.r.i = fadd double %accum.real.i, %X.sroa.0.0.load1.i
+  store double %add.r.i, double* %d, align 8
+  call void @llvm.lifetime.end(i64 -1, i8* %0)
+  ret void
+}
diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll
index 2b0724c7fd..d95e48f303 100644
--- a/test/Transforms/SROA/phi-and-select.ll
+++ b/test/Transforms/SROA/phi-and-select.ll
@@ -256,17 +256,17 @@ entry:
   ret i32 %loaded
 }
 
-define i32 @test10(i32 %b, i32* %ptr) {
+define float @test10(i32 %b, float* %ptr) {
 ; Don't try to promote allocas which are not elligible for it even after
 ; rewriting due to the necessity of inserting bitcasts when speculating a PHI
 ; node.
 ; CHECK: @test10
 ; CHECK: %[[alloca:.*]] = alloca
-; CHECK: %[[argvalue:.*]] = load i32* %ptr
-; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to i32*
-; CHECK: %[[allocavalue:.*]] = load i32* %[[cast]]
-; CHECK: %[[result:.*]] = phi i32 [ %[[allocavalue]], %else ], [ %[[argvalue]], %then ]
-; CHECK-NEXT: ret i32 %[[result]]
+; CHECK: %[[argvalue:.*]] = load float* %ptr
+; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to float*
+; CHECK: %[[allocavalue:.*]] = load float* %[[cast]]
+; CHECK: %[[result:.*]] = phi float [ %[[allocavalue]], %else ], [ %[[argvalue]], %then ]
+; CHECK-NEXT: ret float %[[result]]
 
 entry:
   %f = alloca double
@@ -278,34 +278,34 @@ then:
   br label %exit
 
 else:
-  %bitcast = bitcast double* %f to i32*
+  %bitcast = bitcast double* %f to float*
   br label %exit
 
 exit:
-  %phi = phi i32* [ %bitcast, %else ], [ %ptr, %then ]
-  %loaded = load i32* %phi, align 4
-  ret i32 %loaded
+  %phi = phi float* [ %bitcast, %else ], [ %ptr, %then ]
+  %loaded = load float* %phi, align 4
+  ret float %loaded
 }
 
-define i32 @test11(i32 %b, i32* %ptr) {
+define float @test11(i32 %b, float* %ptr) {
 ; Same as @test10 but for a select rather than a PHI node.
 ; CHECK: @test11
 ; CHECK: %[[alloca:.*]] = alloca
-; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to i32*
-; CHECK: %[[allocavalue:.*]] = load i32* %[[cast]]
-; CHECK: %[[argvalue:.*]] = load i32* %ptr
-; CHECK: %[[result:.*]] = select i1 %{{.*}}, i32 %[[allocavalue]], i32 %[[argvalue]]
-; CHECK-NEXT: ret i32 %[[result]]
+; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to float*
+; CHECK: %[[allocavalue:.*]] = load float* %[[cast]]
+; CHECK: %[[argvalue:.*]] = load float* %ptr
+; CHECK: %[[result:.*]] = select i1 %{{.*}}, float %[[allocavalue]], float %[[argvalue]]
+; CHECK-NEXT: ret float %[[result]]
 
 entry:
   %f = alloca double
   store double 0.0, double* %f
-  store i32 0, i32* %ptr
+  store float 0.0, float* %ptr
   %test = icmp ne i32 %b, 0
-  %bitcast = bitcast double* %f to i32*
-  %select = select i1 %test, i32* %bitcast, i32* %ptr
-  %loaded = load i32* %select, align 4
-  ret i32 %loaded
+  %bitcast = bitcast double* %f to float*
+  %select = select i1 %test, float* %bitcast, float* %ptr
+  %loaded = load float* %select, align 4
+  ret float %loaded
 }
 
 define i32 @test12(i32 %x, i32* %p) {
diff --git a/test/Transforms/SimplifyLibCalls/StrCat.ll b/test/Transforms/SimplifyLibCalls/StrCat.ll
deleted file mode 100644
index 3ea691a3cf..0000000000
--- a/test/Transforms/SimplifyLibCalls/StrCat.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; Test that the StrCatOptimizer works correctly
-; PR3661
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep "call.*strcat"
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   grep "puts.*%arg1"
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
-@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-@null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
-
-declare i8* @strcat(i8*, i8*)
-
-declare i32 @puts(i8*)
-
-define i32 @main() {
-	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
-	%arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=2]
-	store i8 0, i8* %arg1
-	%arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=1]
-	%rslt1 = call i8* @strcat( i8* %arg1, i8* %arg2 )		; <i8*> [#uses=1]
-	%arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
-	%rslt2 = call i8* @strcat( i8* %rslt1, i8* %arg3 )		; <i8*> [#uses=1]
-	%arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0		; <i8*> [#uses=1]
-	%rslt3 = call i8* @strcat( i8* %rslt2, i8* %arg4 )		; <i8*> [#uses=1]
-	call i32 @puts( i8* %rslt3 )		; <i32>:1 [#uses=0]
-	ret i32 0
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/StrChr.ll b/test/Transforms/SimplifyLibCalls/StrChr.ll
deleted file mode 100644
index eaabeb2feb..0000000000
--- a/test/Transforms/SimplifyLibCalls/StrChr.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; Test that the StrChrOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-@hello = constant [14 x i8] c"hello world\5Cn\00"
-@null = constant [1 x i8] zeroinitializer
-
-declare i8* @strchr(i8*, i32)
-
-define i32 @foo(i32 %index) {
-	%hello_p = getelementptr [14 x i8]* @hello, i32 0, i32 0
-	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
-	%world = call i8* @strchr(i8* %hello_p, i32 119)
-; CHECK: getelementptr i8* %hello_p, i64 6
-	%ignore = call i8* @strchr(i8* %null_p, i32 119)
-; CHECK-NOT: call i8* strchr
-	%null = call i8* @strchr(i8* %hello_p, i32 0)
-; CHECK: getelementptr i8* %hello_p, i64 13
-	%result = call i8* @strchr(i8* %hello_p, i32 %index)
-; CHECK: call i8* @memchr(i8* %hello_p, i32 %index, i64 14)
-	ret i32 %index
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/StrCmp.ll b/test/Transforms/SimplifyLibCalls/StrCmp.ll
deleted file mode 100644
index 60854d76c9..0000000000
--- a/test/Transforms/SimplifyLibCalls/StrCmp.ll
+++ /dev/null
@@ -1,65 +0,0 @@
-; Test that the StrCmpOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-
-@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
-@hell = constant [5 x i8] c"hell\00"		; <[5 x i8]*> [#uses=1]
-@bell = constant [5 x i8] c"bell\00"		; <[5 x i8]*> [#uses=1]
-@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-
-declare i32 @strcmp(i8*, i8*)
-
-; strcmp("", x) -> -*x
-define i32 @test1(i8* %str) {
-  %temp1 = call i32 @strcmp(i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0), i8* %str)
-  ret i32 %temp1
-  ; CHECK: @test1
-  ; CHECK: %strcmpload = load i8* %str
-  ; CHECK: %1 = zext i8 %strcmpload to i32
-  ; CHECK: %temp1 = sub i32 0, %1
-  ; CHECK: ret i32 %temp1
-}
-
-; strcmp(x, "") -> *x
-define i32 @test2(i8* %str) {
-  %temp1 = call i32 @strcmp(i8* %str, i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0))
-  ret i32 %temp1
-  ; CHECK: @test2
-  ; CHECK: %strcmpload = load i8* %str
-  ; CHECK: %temp1 = zext i8 %strcmpload to i32
-  ; CHECK: ret i32 %temp1
-}
-
-; strcmp(x, y)  -> cnst
-define i32 @test3() {
-  %temp1 = call i32 @strcmp(i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0))
-  ret i32 %temp1
-  ; CHECK: @test3
-  ; CHECK: ret i32 -1
-}
-define i32 @test4() {
-  %temp1 = call i32 @strcmp(i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0))
-  ret i32 %temp1
-  ; CHECK: @test4
-  ; CHECK: ret i32 1
-}
-
-; strcmp(x, y)   -> memcmp(x, y, <known length>)
-; (This transform is rather difficult to trigger in a useful manner)
-define i32 @test5(i1 %b) {
-  %sel = select i1 %b, i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8]* @bell, i32 0, i32 0)
-  %temp1 = call i32 @strcmp(i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0), i8* %sel)
-  ret i32 %temp1
-  ; CHECK: @test5
-  ; CHECK: %memcmp = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0), i8* %sel, i32 5)
-  ; CHECK: ret i32 %memcmp
-}
-
-; strcmp(x,x)  -> 0
-define i32 @test6(i8* %str) {
-  %temp1 = call i32 @strcmp(i8* %str, i8* %str)
-  ret i32 %temp1
-  ; CHECK: @test6
-  ; CHECK: ret i32 0
-}
diff --git a/test/Transforms/SimplifyLibCalls/StrNCat.ll b/test/Transforms/SimplifyLibCalls/StrNCat.ll
deleted file mode 100644
index 073792b96a..0000000000
--- a/test/Transforms/SimplifyLibCalls/StrNCat.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; Test that the StrNCatOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   not grep "call.*strncat"
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN:   grep "puts.*%arg1"
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
-@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-@null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
-
-declare i8* @strncat(i8*, i8*, i32)
-
-declare i32 @puts(i8*)
-
-define i32 @main() {
-	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
-	%arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=2]
-	store i8 0, i8* %arg1
-	%arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=1]
-	%rslt1 = call i8* @strncat( i8* %arg1, i8* %arg2, i32 6 )		; <i8*> [#uses=1]
-	%arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
-	%rslt2 = call i8* @strncat( i8* %rslt1, i8* %arg3, i32 42 )		; <i8*> [#uses=1]
-	%arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0		; <i8*> [#uses=1]
-	%rslt3 = call i8* @strncat( i8* %rslt2, i8* %arg4, i32 42 )		; <i8*> [#uses=1]
-	call i32 @puts( i8* %rslt3 )		; <i32>:1 [#uses=0]
-	ret i32 0
-}
diff --git a/test/Transforms/SimplifyLibCalls/StrNCmp.ll b/test/Transforms/SimplifyLibCalls/StrNCmp.ll
deleted file mode 100644
index 0b2a501a3c..0000000000
--- a/test/Transforms/SimplifyLibCalls/StrNCmp.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-; Test that the StrCmpOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-
-@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
-@hell = constant [5 x i8] c"hell\00"		; <[5 x i8]*> [#uses=1]
-@bell = constant [5 x i8] c"bell\00"		; <[5 x i8]*> [#uses=1]
-@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
-
-declare i32 @strncmp(i8*, i8*, i32)
-
-; strcmp("", x) -> -*x
-define i32 @test1(i8* %str) {
-  %temp1 = call i32 @strncmp(i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0), i8* %str, i32 10)
-  ret i32 %temp1
-  ; CHECK: @test1
-  ; CHECK: %strcmpload = load i8* %str
-  ; CHECK: %1 = zext i8 %strcmpload to i32
-  ; CHECK: %temp1 = sub i32 0, %1
-  ; CHECK: ret i32 %temp1
-}
-
-; strcmp(x, "") -> *x
-define i32 @test2(i8* %str) {
-  %temp1 = call i32 @strncmp(i8* %str, i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0), i32 10)
-  ret i32 %temp1
-  ; CHECK: @test2
-  ; CHECK: %strcmpload = load i8* %str
-  ; CHECK: %temp1 = zext i8 %strcmpload to i32
-  ; CHECK: ret i32 %temp1
-}
-
-; strncmp(x, y, n)  -> cnst
-define i32 @test3() {
-  %temp1 = call i32 @strncmp(i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0), i32 10)
-  ret i32 %temp1
-  ; CHECK: @test3
-  ; CHECK: ret i32 -1
-}
-define i32 @test4() {
-  %temp1 = call i32 @strncmp(i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0), i32 10)
-  ret i32 %temp1
-  ; CHECK: @test4
-  ; CHECK: ret i32 1
-}
-define i32 @test5() {
-  %temp1 = call i32 @strncmp(i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0), i32 4)
-  ret i32 %temp1
-  ; CHECK: @test5
-  ; CHECK: ret i32 0
-}
-
-; strncmp(x,y,1) -> memcmp(x,y,1)
-define i32 @test6(i8* %str1, i8* %str2) {
-  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 1)
-  ret i32 %temp1
-  ; CHECK: @test6
-  ; CHECK: load i8*
-  ; CHECK: load i8*
-  ; CHECK: sub i32
-}
-
-; strncmp(x,y,0)   -> 0
-define i32 @test7(i8* %str1, i8* %str2) {
-  %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 0)
-  ret i32 %temp1
-  ; CHECK: @test7
-  ; CHECK: ret i32 0
-}
-
-; strncmp(x,x,n)  -> 0
-define i32 @test8(i8* %str, i32 %n) {
-  %temp1 = call i32 @strncmp(i8* %str, i8* %str, i32 %n)
-  ret i32 %temp1
-  ; CHECK: @test8
-  ; CHECK: ret i32 0
-}
diff --git a/test/Transforms/SimplifyLibCalls/StrRChr.ll b/test/Transforms/SimplifyLibCalls/StrRChr.ll
deleted file mode 100644
index 2259fc0289..0000000000
--- a/test/Transforms/SimplifyLibCalls/StrRChr.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; Test that the StrRChrOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "-p:64:64:64"
-
-@hello = constant [14 x i8] c"hello world\5Cn\00"
-@null = constant [1 x i8] zeroinitializer
-
-declare i8* @strrchr(i8*, i32)
-
-define void @foo(i8* %bar) {
-	%hello_p = getelementptr [14 x i8]* @hello, i32 0, i32 0
-	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
-	%world = call i8* @strrchr(i8* %hello_p, i32 119)
-; CHECK: getelementptr i8* %hello_p, i64 6
-	%ignore = call i8* @strrchr(i8* %null_p, i32 119)
-; CHECK-NOT: call i8* strrchr
-	%null = call i8* @strrchr(i8* %hello_p, i32 0)
-; CHECK: getelementptr i8* %hello_p, i64 13
-	%strchr = call i8* @strrchr(i8* %bar, i32 0)
-; CHECK: call i8* @strchr(i8* %bar, i32 0)
-	ret void
-}
diff --git a/test/Transforms/SimplifyLibCalls/weak-symbols.ll b/test/Transforms/SimplifyLibCalls/weak-symbols.ll
deleted file mode 100644
index 5875b211f7..0000000000
--- a/test/Transforms/SimplifyLibCalls/weak-symbols.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-; PR4738
-
-; SimplifyLibcalls shouldn't assume anything about weak symbols.
-
-@real_init = weak_odr constant [2 x i8] c"y\00"
-@fake_init = weak constant [2 x i8] c"y\00"
-@.str = private constant [2 x i8] c"y\00"
-
-; CHECK: define i32 @foo
-; CHECK: call i32 @strcmp
-define i32 @foo() nounwind {
-entry:
-  %t0 = call i32 @strcmp(i8* getelementptr inbounds ([2 x i8]* @fake_init, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0)) nounwind readonly
-  ret i32 %t0
-}
-
-; CHECK: define i32 @bar
-; CHECK: ret i32 0
-define i32 @bar() nounwind {
-entry:
-  %t0 = call i32 @strcmp(i8* getelementptr inbounds ([2 x i8]* @real_init, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0)) nounwind readonly
-  ret i32 %t0
-}
-
-declare i32 @strcmp(i8*, i8*) nounwind readonly
diff --git a/tools/bugpoint-passes/bugpoint.exports b/tools/bugpoint-passes/bugpoint.exports
index e69de29bb2..d8fdd6a576 100644
--- a/tools/bugpoint-passes/bugpoint.exports
+++ b/tools/bugpoint-passes/bugpoint.exports
@@ -0,0 +1 @@
+_ZN4llvm14BasicBlockPass14doFinalizationERNS_6ModuleE
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index 57a31f21b8..0ee72387b8 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -171,6 +171,23 @@ namespace {
     cl::init(false));
 
   cl::opt<bool>
+  GenerateSoftFloatCalls("soft-float",
+    cl::desc("Generate software floating point library calls"),
+    cl::init(false));
+
+  cl::opt<llvm::FloatABI::ABIType>
+  FloatABIForCalls("float-abi",
+                   cl::desc("Choose float ABI type"),
+                   cl::init(FloatABI::Default),
+                   cl::values(
+                     clEnumValN(FloatABI::Default, "default",
+                                "Target default float ABI type"),
+                     clEnumValN(FloatABI::Soft, "soft",
+                                "Soft float ABI (implied by -soft-float)"),
+                     clEnumValN(FloatABI::Hard, "hard",
+                                "Hard float ABI (uses FP registers)"),
+                     clEnumValEnd));
+  cl::opt<bool>
 // In debug builds, make this default to true.
 #ifdef NDEBUG
 #define EMIT_DEBUG false
@@ -555,15 +572,22 @@ int main(int argc, char **argv, char * const *envp) {
   }
   builder.setOptLevel(OLvl);
 
+  TargetOptions Options;
+  Options.UseSoftFloat = GenerateSoftFloatCalls;
+  if (FloatABIForCalls != FloatABI::Default)
+    Options.FloatABIType = FloatABIForCalls;
+  if (GenerateSoftFloatCalls)
+    FloatABIForCalls = FloatABI::Soft;
+
   // Remote target execution doesn't handle EH or debug registration.
   if (!RemoteMCJIT) {
-    TargetOptions Options;
     Options.JITExceptionHandling = EnableJITExceptionHandling;
     Options.JITEmitDebugInfo = EmitJitDebugInfo;
     Options.JITEmitDebugInfoToDisk = EmitJitDebugInfoToDisk;
-    builder.setTargetOptions(Options);
   }
 
+  builder.setTargetOptions(Options);
+
   EE = builder.create();
   if (!EE) {
     if (!ErrorMsg.empty())
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index effc9c4365..f417f5f4fd 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -366,12 +366,13 @@ bool LTOCodeGenerator::determineTarget(std::string& errMsg) {
   if (_target != NULL)
     return false;
 
-  std::string Triple = _linker.getModule()->getTargetTriple();
-  if (Triple.empty())
-    Triple = sys::getDefaultTargetTriple();
+  std::string TripleStr = _linker.getModule()->getTargetTriple();
+  if (TripleStr.empty())
+    TripleStr = sys::getDefaultTargetTriple();
+  llvm::Triple Triple(TripleStr);
 
   // create target machine from info for merged modules
-  const Target *march = TargetRegistry::lookupTarget(Triple, errMsg);
+  const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
   if (march == NULL)
     return true;
 
@@ -392,11 +393,18 @@ bool LTOCodeGenerator::determineTarget(std::string& errMsg) {
 
   // construct LTOModule, hand over ownership of module and target
   SubtargetFeatures Features;
-  Features.getDefaultSubtargetFeatures(llvm::Triple(Triple));
+  Features.getDefaultSubtargetFeatures(Triple);
   std::string FeatureStr = Features.getString();
+  // Set a default CPU for Darwin triples.
+  if (_mCpu.empty() && Triple.isOSDarwin()) {
+    if (Triple.getArch() == llvm::Triple::x86_64)
+      _mCpu = "core2";
+    else if (Triple.getArch() == llvm::Triple::x86)
+      _mCpu = "yonah";
+  }
   TargetOptions Options;
   LTOModule::getTargetOptions(Options);
-  _target = march->createTargetMachine(Triple, _mCpu, FeatureStr, Options,
+  _target = march->createTargetMachine(TripleStr, _mCpu, FeatureStr, Options,
                                        RelocModel, CodeModel::Default,
                                        CodeGenOpt::Aggressive);
   return false;
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index c3c6692235..cb8a4e5f0d 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -281,23 +281,31 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
     return NULL;
   }
 
-  std::string Triple = m->getTargetTriple();
-  if (Triple.empty())
-    Triple = sys::getDefaultTargetTriple();
+  std::string TripleStr = m->getTargetTriple();
+  if (TripleStr.empty())
+    TripleStr = sys::getDefaultTargetTriple();
+  llvm::Triple Triple(TripleStr);
 
   // find machine architecture for this module
-  const Target *march = TargetRegistry::lookupTarget(Triple, errMsg);
+  const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
   if (!march)
     return NULL;
 
   // construct LTOModule, hand over ownership of module and target
   SubtargetFeatures Features;
-  Features.getDefaultSubtargetFeatures(llvm::Triple(Triple));
+  Features.getDefaultSubtargetFeatures(Triple);
   std::string FeatureStr = Features.getString();
+  // Set a default CPU for Darwin triples.
   std::string CPU;
+  if (Triple.isOSDarwin()) {
+    if (Triple.getArch() == llvm::Triple::x86_64)
+      CPU = "core2";
+    else if (Triple.getArch() == llvm::Triple::x86)
+      CPU = "yonah";
+  }
   TargetOptions Options;
   getTargetOptions(Options);
-  TargetMachine *target = march->createTargetMachine(Triple, CPU, FeatureStr,
+  TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
                                                      Options);
 
   // @LOCALMOD-BEGIN
diff --git a/unittests/ADT/CMakeLists.txt b/unittests/ADT/CMakeLists.txt
index cb9a9092b5..94f7fda2a9 100644
--- a/unittests/ADT/CMakeLists.txt
+++ b/unittests/ADT/CMakeLists.txt
@@ -13,6 +13,7 @@ set(ADTSources
   FoldingSet.cpp
   HashingTest.cpp
   ilistTest.cpp
+  ImmutableMapTest.cpp
   ImmutableSetTest.cpp
   IntEqClassesTest.cpp
   IntervalMapTest.cpp
diff --git a/unittests/ADT/ImmutableMapTest.cpp b/unittests/ADT/ImmutableMapTest.cpp
new file mode 100644
index 0000000000..774581ca4e
--- /dev/null
+++ b/unittests/ADT/ImmutableMapTest.cpp
@@ -0,0 +1,50 @@
+//===----------- ImmutableMapTest.cpp - ImmutableMap unit tests ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/ImmutableMap.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(ImmutableMapTest, EmptyIntMapTest) {
+  ImmutableMap<int, int>::Factory f;
+
+  EXPECT_TRUE(f.getEmptyMap() == f.getEmptyMap());
+  EXPECT_FALSE(f.getEmptyMap() != f.getEmptyMap());
+  EXPECT_TRUE(f.getEmptyMap().isEmpty());
+
+  ImmutableMap<int, int> S = f.getEmptyMap();
+  EXPECT_EQ(0u, S.getHeight());
+  EXPECT_TRUE(S.begin() == S.end());
+  EXPECT_FALSE(S.begin() != S.end());
+}
+
+TEST(ImmutableMapTest, MultiElemIntMapTest) {
+  ImmutableMap<int, int>::Factory f;
+  ImmutableMap<int, int> S = f.getEmptyMap();
+
+  ImmutableMap<int, int> S2 = f.add(f.add(f.add(S, 3, 10), 4, 11), 5, 12);
+
+  EXPECT_TRUE(S.isEmpty());
+  EXPECT_FALSE(S2.isEmpty());
+
+  EXPECT_EQ(0, S.lookup(3));
+  EXPECT_EQ(0, S.lookup(9));
+
+  EXPECT_EQ(10, *S2.lookup(3));
+  EXPECT_EQ(11, *S2.lookup(4));
+  EXPECT_EQ(12, *S2.lookup(5));
+
+  EXPECT_EQ(5, S2.getMaxElement()->first);
+  EXPECT_EQ(3U, S2.getHeight());
+}
+
+}
diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp
index 967437ca05..7c3ab97389 100644
--- a/unittests/ADT/TripleTest.cpp
+++ b/unittests/ADT/TripleTest.cpp
@@ -105,6 +105,18 @@ TEST(TripleTest, ParsedIDs) {
   EXPECT_EQ(Triple::Linux, T.getOS());
   EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
 
+  T = Triple("powerpc-ibm-aix");
+  EXPECT_EQ(Triple::ppc, T.getArch());
+  EXPECT_EQ(Triple::IBM, T.getVendor());
+  EXPECT_EQ(Triple::AIX, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
+  T = Triple("powerpc64-ibm-aix");
+  EXPECT_EQ(Triple::ppc64, T.getArch());
+  EXPECT_EQ(Triple::IBM, T.getVendor());
+  EXPECT_EQ(Triple::AIX, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
   T = Triple("powerpc-dunno-notsure");
   EXPECT_EQ(Triple::ppc, T.getArch());
   EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index 5e2af030f2..ae6855e68b 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -633,6 +633,7 @@ TEST_F(JITTest, AvailableExternallyGlobalIsntEmitted) {
 // This function is intentionally defined differently in the statically-compiled
 // program from the IR input to the JIT to assert that the JIT doesn't use its
 // definition.
+extern "C" int32_t JITTest_AvailableExternallyFunction() LLVM_ATTRIBUTE_USED;
 extern "C" int32_t JITTest_AvailableExternallyFunction() {
   return 42;
 }
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 38ddb8021b..e76fa57066 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -1714,9 +1714,7 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
   OpOS << "void " << Target.getName() << ClassName << "::\n"
        << "convertToMapAndConstraints(unsigned Kind,\n";
   OpOS.indent(27);
-  OpOS << "const SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n";
-  OpOS.indent(27);
-  OpOS << "MatchInstMapAndConstraintsImpl &MapAndConstraints) {\n"
+  OpOS << "const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {\n"
        << "  assert(Kind < CVT_NUM_SIGNATURES && \"Invalid signature!\");\n"
        << "  unsigned NumMCOperands = 0;\n"
        << "  const uint8_t *Converter = ConversionTable[Kind];\n"
@@ -1724,9 +1722,11 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
        << "    switch (*p) {\n"
        << "    default: llvm_unreachable(\"invalid conversion entry!\");\n"
        << "    case CVT_Reg:\n"
+       << "      Operands[*(p + 1)]->setMCOperandNum(NumMCOperands);\n"
+       << "      Operands[*(p + 1)]->setConstraint(\"m\");\n"
+       << "      ++NumMCOperands;\n"
+       << "      break;\n"
        << "    case CVT_Tied:\n"
-       << "      MapAndConstraints.push_back(std::make_pair(NumMCOperands,"
-       << "\"m\"));\n"
        << "      ++NumMCOperands;\n"
        << "      break;\n";
 
@@ -1823,8 +1823,8 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
 
         // Add a handler for the operand number lookup.
         OpOS << "    case " << Name << ":\n"
-             << "      MapAndConstraints.push_back(std::make_pair(NumMCOperands"
-             << ",\"m\"));\n"
+             << "      Operands[*(p + 1)]->setMCOperandNum(NumMCOperands);\n"
+             << "      Operands[*(p + 1)]->setConstraint(\"m\");\n"
              << "      NumMCOperands += " << OpInfo.MINumOperands << ";\n"
              << "      break;\n";
         break;
@@ -1862,8 +1862,8 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
               << "      break;\n";
 
         OpOS << "    case " << Name << ":\n"
-             << "      MapAndConstraints.push_back(std::make_pair(NumMCOperands"
-             << ",\"\"));\n"
+             << "      Operands[*(p + 1)]->setMCOperandNum(NumMCOperands);\n"
+             << "      Operands[*(p + 1)]->setConstraint(\"\");\n"
              << "      ++NumMCOperands;\n"
              << "      break;\n";
         break;
@@ -1893,8 +1893,8 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
               << "      break;\n";
 
         OpOS << "    case " << Name << ":\n"
-             << "      MapAndConstraints.push_back(std::make_pair(NumMCOperands"
-             << ",\"m\"));\n"
+             << "      Operands[*(p + 1)]->setMCOperandNum(NumMCOperands);\n"
+             << "      Operands[*(p + 1)]->setConstraint(\"m\");\n"
              << "      ++NumMCOperands;\n"
              << "      break;\n";
       }
@@ -2604,16 +2604,12 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
      << "                       const SmallVectorImpl<MCParsedAsmOperand*> "
      << "&Operands);\n";
   OS << "  void convertToMapAndConstraints(unsigned Kind,\n                ";
-  OS << "           const SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n";
-  OS.indent(29);
-  OS << "MatchInstMapAndConstraintsImpl &MapAndConstraints);\n";
+  OS << "           const SmallVectorImpl<MCParsedAsmOperand*> &Operands);\n";
   OS << "  bool mnemonicIsValid(StringRef Mnemonic);\n";
   OS << "  unsigned MatchInstructionImpl(\n";
   OS.indent(27);
   OS << "const SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n"
-     << "                                unsigned &Kind, MCInst &Inst,\n";
-  OS.indent(30);
-  OS << "MatchInstMapAndConstraintsImpl &MapAndConstraints,\n"
+     << "                                MCInst &Inst,\n"
      << "                                unsigned &ErrorInfo,"
      << " bool matchingInlineAsm,\n"
      << "                                unsigned VariantID = 0);\n";
@@ -2806,8 +2802,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
      << Target.getName() << ClassName << "::\n"
      << "MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*>"
      << " &Operands,\n";
-  OS << "                     unsigned &Kind, MCInst &Inst,\n"
-     << "SmallVectorImpl<std::pair< unsigned, std::string > > &MapAndConstraints,\n"
+  OS << "                     MCInst &Inst,\n"
      << "unsigned &ErrorInfo, bool matchingInlineAsm, unsigned VariantID) {\n";
 
   OS << "  // Eliminate obvious mismatches.\n";
@@ -2903,10 +2898,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "    }\n";
   OS << "\n";
   OS << "    if (matchingInlineAsm) {\n";
-  OS << "      Kind = it->ConvertFn;\n";
   OS << "      Inst.setOpcode(it->Opcode);\n";
-  OS << "      convertToMapAndConstraints(it->ConvertFn, Operands, "
-     << "MapAndConstraints);\n";
+  OS << "      convertToMapAndConstraints(it->ConvertFn, Operands);\n";
   OS << "      return Match_Success;\n";
   OS << "    }\n\n";
   OS << "    // We have selected a definite instruction, convert the parsed\n"
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 4e97cf4693..e447c16b16 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -299,7 +299,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
                                   const OperandInfoMapTy &OpInfo,
                                   raw_ostream &OS) {
   int MinOperands = 0;
-  if (!Inst.Operands.size() == 0)
+  if (!Inst.Operands.empty())
     // Each logical operand can be multiple MI operands.
     MinOperands = Inst.Operands.back().MIOperandNo +
                   Inst.Operands.back().MINumOperands;
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 080e711d55..e830a66a33 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -510,10 +510,10 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
   OS << "// Add parameter attributes that are not common to all intrinsics.\n";
   OS << "#ifdef GET_INTRINSIC_ATTRIBUTES\n";
   if (TargetOnly)
-    OS << "static AttrListPtr getAttributes(" << TargetPrefix 
+    OS << "static AttrListPtr getAttributes(LLVMContext &C, " << TargetPrefix
        << "Intrinsic::ID id) {\n";
   else
-    OS << "AttrListPtr Intrinsic::getAttributes(ID id) {\n";
+    OS << "AttrListPtr Intrinsic::getAttributes(LLVMContext &C, ID id) {\n";
 
   // Compute the maximum number of attribute arguments and the map
   typedef std::map<const CodeGenIntrinsic*, unsigned,
@@ -582,7 +582,7 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
           ++ai;
         } while (ai != ae && intrinsic.ArgumentAttributes[ai].first == argNo);
 
-        OS << "      AWI[" << numAttrs++ << "] = AttributeWithIndex::get("
+        OS << "      AWI[" << numAttrs++ << "] = AttributeWithIndex::get(C, "
            << argNo+1 << ", AttrVec);\n";
       }
     }
@@ -606,8 +606,8 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
         OS << "      AttrVec.push_back(Attributes::ReadNone);\n"; 
         break;
       }
-      OS << "      AWI[" << numAttrs++ << "] = AttributeWithIndex::get(~0, "
-         << "AttrVec);\n";
+      OS << "      AWI[" << numAttrs++ << "] = AttributeWithIndex::get(C, "
+         << "AttrListPtr::FunctionIndex, AttrVec);\n";
     }
 
     if (numAttrs) {