19 files changed, 582 insertions, 252 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index b94dd69deb..10f5b6e658 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -592,14 +592,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
 
   Type *RetTy = FTy->getReturnType();
 
-  // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
-  // have zero fixed arguments.
-  bool ExtraArgHack = false;
-  if (Params.empty() && FTy->isVarArg()) {
-    ExtraArgHack = true;
-    Params.push_back(Type::getInt32Ty(F->getContext()));
-  }
-
   // Construct the new function type using the new arguments.
   FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
 
@@ -711,9 +703,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
         }
       }
 
-    if (ExtraArgHack)
-      Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext())));
-
     // Push any varargs arguments on the list.
     for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
       Args.push_back(*AI);
@@ -870,16 +859,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     }
 
     // Increment I2 past all of the arguments added for this promoted pointer.
-    for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i)
-      ++I2;
+    std::advance(I2, ArgIndices.size());
   }
 
-  // Notify the alias analysis implementation that we inserted a new argument.
-  if (ExtraArgHack)
-    AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), 
-                 NF->arg_begin());
-
-
   // Tell the alias analysis that the old function is about to disappear.
   AA.replaceWithNewValue(F, NF);
 
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index fd23a935b9..c7429c5954 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -717,9 +717,9 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   // here. Currently, this should not be possible, but special handling might be
   // required when new return value attributes are added.
   if (NRetTy->isVoidTy())
-    RAttrs &= ~Attribute::typeIncompatible(NRetTy);
+    RAttrs &= ~Attributes::typeIncompatible(NRetTy);
   else
-    assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0
+    assert((RAttrs & Attributes::typeIncompatible(NRetTy)) == 0
            && "Return attributes no longer compatible?");
 
   if (RAttrs)
@@ -786,7 +786,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
     Attributes RAttrs = CallPAL.getRetAttributes();
     Attributes FnAttrs = CallPAL.getFnAttributes();
     // Adjust in case the function was changed to return void.
-    RAttrs &= ~Attribute::typeIncompatible(NF->getReturnType());
+    RAttrs &= ~Attributes::typeIncompatible(NF->getReturnType());
     if (RAttrs)
       AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
 
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index b888e95982..b1ba6be5ff 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -962,7 +962,9 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
       // If we get here we could have other crazy uses that are transitively
       // loaded.
       assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
-              isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser)) &&
+              isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
+              isa<BitCastInst>(GlobalUser) ||
+              isa<GetElementPtrInst>(GlobalUser)) &&
              "Only expect load and stores!");
     }
   }
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 664ddf6f7a..42f0991360 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -65,7 +65,7 @@ Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) {
 
 /// \brief Minimal filter to detect invalid constructs for inlining.
 static bool isInlineViable(Function &F) {
-  bool ReturnsTwice = F.hasFnAttr(Attribute::ReturnsTwice);
+  bool ReturnsTwice = F.getFnAttributes().hasReturnsTwiceAttr();
   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
     // Disallow inlining of functions which contain an indirect branch.
     if (isa<IndirectBrInst>(BI->getTerminator()))
@@ -114,7 +114,7 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
   if (Callee->isDeclaration()) return InlineCost::getNever();
 
   // Return never for anything not marked as always inline.
-  if (!Callee->hasFnAttr(Attribute::AlwaysInline))
+  if (!Callee->getFnAttributes().hasAlwaysInlineAttr())
     return InlineCost::getNever();
 
   // Do some minimal analysis to preclude non-viable functions.
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index a9263baa44..7932b40bdc 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -93,10 +93,10 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
 
   // If the inlined function had a higher stack protection level than the
   // calling function, then bump up the caller's stack protection level.
-  if (Callee->hasFnAttr(Attribute::StackProtectReq))
+  if (Callee->getFnAttributes().hasStackProtectReqAttr())
     Caller->addFnAttr(Attribute::StackProtectReq);
-  else if (Callee->hasFnAttr(Attribute::StackProtect) &&
-           !Caller->hasFnAttr(Attribute::StackProtectReq))
+  else if (Callee->getFnAttributes().hasStackProtectAttr() &&
+           !Caller->getFnAttributes().hasStackProtectReqAttr())
     Caller->addFnAttr(Attribute::StackProtect);
 
   // Look at all of the allocas that we inlined through this call site.  If we
@@ -209,7 +209,7 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
   // would decrease the threshold.
   Function *Caller = CS.getCaller();
   bool OptSize = Caller && !Caller->isDeclaration() &&
-    Caller->hasFnAttr(Attribute::OptimizeForSize);
+    Caller->getFnAttributes().hasOptimizeForSizeAttr();
   if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
       OptSizeThreshold < thres)
     thres = OptSizeThreshold;
@@ -217,7 +217,7 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
   // Listen to the inlinehint attribute when it would increase the threshold.
   Function *Callee = CS.getCalledFunction();
   bool InlineHint = Callee && !Callee->isDeclaration() &&
-    Callee->hasFnAttr(Attribute::InlineHint);
+    Callee->getFnAttributes().hasInlineHintAttr();
   if (InlineHint && HintThreshold > thres)
     thres = HintThreshold;
 
@@ -533,7 +533,7 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
     // Handle the case when this function is called and we only want to care
     // about always-inline functions. This is a bit of a hack to share code
     // between here and the InlineAlways pass.
-    if (AlwaysInlineOnly && !F->hasFnAttr(Attribute::AlwaysInline))
+    if (AlwaysInlineOnly && !F->getFnAttributes().hasAlwaysInlineAttr())
       continue;
 
     // If the only remaining users of the function are dead constants, remove
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index c81b333813..9e328b9ac9 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -211,13 +211,12 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
     // FIXME: We shouldn't bother with this anymore.
     MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
 
-    // GlobalOpt already deletes dead functions and globals, at -O3 try a
+    // GlobalOpt already deletes dead functions and globals, at -O2 try a
     // late pass of GlobalDCE.  It is capable of deleting dead cycles.
-    if (OptLevel > 2)
+    if (OptLevel > 1) {
       MPM.add(createGlobalDCEPass());         // Remove dead fns and globals.
-
-    if (OptLevel > 1)
       MPM.add(createConstantMergePass());     // Merge dup global constants
+    }
   }
   addExtensionsToPM(EP_OptimizerLast, MPM);
 }
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 23c08699ff..ac30dcdcbf 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1037,7 +1037,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
 
     if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
       Attributes RAttrs = CallerPAL.getRetAttributes();
-      if (RAttrs & Attribute::typeIncompatible(NewRetTy))
+      if (RAttrs & Attributes::typeIncompatible(NewRetTy))
         return false;   // Attribute not compatible with transformed value.
     }
 
@@ -1067,7 +1067,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
       return false;   // Cannot transform this parameter value.
 
     Attributes Attrs = CallerPAL.getParamAttributes(i + 1);
-    if (Attrs & Attribute::typeIncompatible(ParamTy))
+    if (Attrs & Attributes::typeIncompatible(ParamTy))
       return false;   // Attribute not compatible with transformed value.
 
     // If the parameter is passed as a byval argument, then we have to have a
@@ -1141,7 +1141,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
 
   // If the return value is not being used, the type may not be compatible
   // with the existing attributes.  Wipe out any problematic attributes.
-  RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
+  RAttrs &= ~Attributes::typeIncompatible(NewRetTy);
 
   // Add the new return attributes.
   if (RAttrs)
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 5b6cf4a4a8..a446e427e5 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -264,26 +264,28 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
     }
   }
 
-  // Check to see if this allocation is only modified by a memcpy/memmove from
-  // a constant global whose alignment is equal to or exceeds that of the
-  // allocation.  If this is the case, we can change all users to use
-  // the constant global instead.  This is commonly produced by the CFE by
-  // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
-  // is only subsequently read.
-  SmallVector<Instruction *, 4> ToDelete;
-  if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
-    if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
-      DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
-      DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
-      for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
-        EraseInstFromFunction(*ToDelete[i]);
-      Constant *TheSrc = cast<Constant>(Copy->getSource());
-      Instruction *NewI
-        = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
-                                                           AI.getType()));
-      EraseInstFromFunction(*Copy);
-      ++NumGlobalCopies;
-      return NewI;
+  if (TD) {
+    // Check to see if this allocation is only modified by a memcpy/memmove from
+    // a constant global whose alignment is equal to or exceeds that of the
+    // allocation.  If this is the case, we can change all users to use
+    // the constant global instead.  This is commonly produced by the CFE by
+    // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
+    // is only subsequently read.
+    SmallVector<Instruction *, 4> ToDelete;
+    if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
+      if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) {
+        DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
+        DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
+        for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
+          EraseInstFromFunction(*ToDelete[i]);
+        Constant *TheSrc = cast<Constant>(Copy->getSource());
+        Instruction *NewI
+          = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc,
+                                                             AI.getType()));
+        EraseInstFromFunction(*Copy);
+        ++NumGlobalCopies;
+        return NewI;
+      }
     }
   }
 
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 291e80019e..0ba7340e64 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -903,7 +903,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     return &SI;
   }
 
-  if (VectorType* VecTy = dyn_cast<VectorType>(SI.getType())) {
+  if (VectorType *VecTy = dyn_cast<VectorType>(SI.getType())) {
     unsigned VWidth = VecTy->getNumElements();
     APInt UndefElts(VWidth, 0);
     APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
@@ -912,6 +912,28 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
         return ReplaceInstUsesWith(SI, V);
       return &SI;
     }
+
+    if (ConstantVector *CV = dyn_cast<ConstantVector>(CondVal)) {
+      // Form a shufflevector instruction.
+      SmallVector<Constant *, 8> Mask(VWidth);
+      Type *Int32Ty = Type::getInt32Ty(CV->getContext());
+      for (unsigned i = 0; i != VWidth; ++i) {
+        Constant *Elem = cast<Constant>(CV->getOperand(i));
+        if (ConstantInt *E = dyn_cast<ConstantInt>(Elem))
+          Mask[i] = ConstantInt::get(Int32Ty, i + (E->isZero() ? VWidth : 0));
+        else if (isa<UndefValue>(Elem))
+          Mask[i] = UndefValue::get(Int32Ty);
+        else
+          return 0;
+      }
+      Constant *MaskVal = ConstantVector::get(Mask);
+      Value *V = Builder->CreateShuffleVector(TrueVal, FalseVal, MaskVal);
+      return ReplaceInstUsesWith(SI, V);
+    }
+
+    if (isa<ConstantAggregateZero>(CondVal)) {
+      return ReplaceInstUsesWith(SI, FalseVal);
+    }
   }
 
   return 0;
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index afa6a4b5e6..1b102bd243 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -854,7 +854,7 @@ bool AddressSanitizer::handleFunction(Module &M, Function &F) {
   // If needed, insert __asan_init before checking for AddressSafety attr.
   maybeInsertAsanInitAtFunctionEntry(F);
 
-  if (!F.hasFnAttr(Attribute::AddressSafety)) return false;
+  if (!F.getFnAttributes().hasAddressSafetyAttr()) return false;
 
   if (!ClDebugFunc.empty() && ClDebugFunc != F.getName())
     return false;
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 495cdc6321..305d70f27b 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -149,7 +149,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   TLInfo = &getAnalysis<TargetLibraryInfo>();
   DT = getAnalysisIfAvailable<DominatorTree>();
   PFI = getAnalysisIfAvailable<ProfileInfo>();
-  OptSize = F.hasFnAttr(Attribute::OptimizeForSize);
+  OptSize = F.getFnAttributes().hasOptimizeForSizeAttr();
 
   /// This optimization identifies DIV instructions that can be
   /// profitably bypassed and carried out with a shorter, faster divide.
@@ -226,7 +226,8 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
     // edge, just collapse it.
     BasicBlock *SinglePred = BB->getSinglePredecessor();
 
-    if (!SinglePred || SinglePred == BB) continue;
+    // Don't merge if BB's address is taken.
+    if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
 
     BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
     if (Term && !Term->isConditional()) {
@@ -788,7 +789,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
   }
 
   // If we eliminated all predecessors of the block, delete the block now.
-  if (Changed && pred_begin(BB) == pred_end(BB))
+  if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
     BB->eraseFromParent();
 
   return Changed;
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 9b0aadb0b5..3ec6f3dcc3 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -235,6 +235,11 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
       // This case never fires - remove it.
       CI.getCaseSuccessor()->removePredecessor(BB);
       SI->removeCase(CI); // Does not invalidate the iterator.
+
+      // The condition can be modified by removePredecessor's PHI simplification
+      // logic.
+      Cond = SI->getCondition();
+
       ++NumDeadCases;
       Changed = true;
     } else if (State == LazyValueInfo::True) {
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 1ff4329c84..301ee2f663 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -30,6 +30,7 @@
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/ADT/SetVector.h"
@@ -45,6 +46,7 @@ namespace {
     AliasAnalysis *AA;
     MemoryDependenceAnalysis *MD;
     DominatorTree *DT;
+    const TargetLibraryInfo *TLI;
 
     static char ID; // Pass identification, replacement for typeid
     DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) {
@@ -55,6 +57,7 @@ namespace {
       AA = &getAnalysis<AliasAnalysis>();
       MD = &getAnalysis<MemoryDependenceAnalysis>();
       DT = &getAnalysis<DominatorTree>();
+      TLI = AA->getTargetLibraryInfo();
 
       bool Changed = false;
       for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
@@ -144,7 +147,7 @@ static void DeleteDeadInstruction(Instruction *I,
 
 /// hasMemoryWrite - Does this instruction write some memory?  This only returns
 /// true for things that we can analyze with other helpers below.
-static bool hasMemoryWrite(Instruction *I) {
+static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
   if (isa<StoreInst>(I))
     return true;
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
@@ -159,6 +162,26 @@ static bool hasMemoryWrite(Instruction *I) {
       return true;
     }
   }
+  if (CallSite CS = I) {
+    if (Function *F = CS.getCalledFunction()) {
+      if (TLI && TLI->has(LibFunc::strcpy) &&
+          F->getName() == TLI->getName(LibFunc::strcpy)) {
+        return true;
+      }
+      if (TLI && TLI->has(LibFunc::strncpy) &&
+          F->getName() == TLI->getName(LibFunc::strncpy)) {
+        return true;
+      }
+      if (TLI && TLI->has(LibFunc::strcat) &&
+          F->getName() == TLI->getName(LibFunc::strcat)) {
+        return true;
+      }
+      if (TLI && TLI->has(LibFunc::strncat) &&
+          F->getName() == TLI->getName(LibFunc::strncat)) {
+        return true;
+      }
+    }
+  }
   return false;
 }
 
@@ -206,7 +229,8 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
 /// instruction if any.
 static AliasAnalysis::Location
 getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
-  assert(hasMemoryWrite(Inst) && "Unknown instruction case");
+  assert(hasMemoryWrite(Inst, AA.getTargetLibraryInfo()) &&
+         "Unknown instruction case");
 
   // The only instructions that both read and write are the mem transfer
   // instructions (memcpy/memmove).
@@ -223,23 +247,29 @@ static bool isRemovable(Instruction *I) {
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
     return SI->isUnordered();
 
-  IntrinsicInst *II = cast<IntrinsicInst>(I);
-  switch (II->getIntrinsicID()) {
-  default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate");
-  case Intrinsic::lifetime_end:
-    // Never remove dead lifetime_end's, e.g. because it is followed by a
-    // free.
-    return false;
-  case Intrinsic::init_trampoline:
-    // Always safe to remove init_trampoline.
-    return true;
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    switch (II->getIntrinsicID()) {
+    default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate");
+    case Intrinsic::lifetime_end:
+      // Never remove dead lifetime_end's, e.g. because it is followed by a
+      // free.
+      return false;
+    case Intrinsic::init_trampoline:
+      // Always safe to remove init_trampoline.
+      return true;
 
-  case Intrinsic::memset:
-  case Intrinsic::memmove:
-  case Intrinsic::memcpy:
-    // Don't remove volatile memory intrinsics.
-    return !cast<MemIntrinsic>(II)->isVolatile();
+    case Intrinsic::memset:
+    case Intrinsic::memmove:
+    case Intrinsic::memcpy:
+      // Don't remove volatile memory intrinsics.
+      return !cast<MemIntrinsic>(II)->isVolatile();
+    }
   }
+
+  if (CallSite CS = I)
+    return CS.getInstruction()->use_empty();
+
+  return false;
 }
 
 
@@ -250,14 +280,19 @@ static bool isShortenable(Instruction *I) {
   if (isa<StoreInst>(I))
     return false;
 
-  IntrinsicInst *II = cast<IntrinsicInst>(I);
-  switch (II->getIntrinsicID()) {
-    default: return false;
-    case Intrinsic::memset:
-    case Intrinsic::memcpy:
-      // Do shorten memory intrinsics.
-      return true;
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    switch (II->getIntrinsicID()) {
+      default: return false;
+      case Intrinsic::memset:
+      case Intrinsic::memcpy:
+        // Do shorten memory intrinsics.
+        return true;
+    }
   }
+
+  // Don't shorten libcalls calls for now.
+
+  return false;
 }
 
 /// getStoredPointerOperand - Return the pointer that is being written to.
@@ -267,12 +302,18 @@ static Value *getStoredPointerOperand(Instruction *I) {
   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
     return MI->getDest();
 
-  IntrinsicInst *II = cast<IntrinsicInst>(I);
-  switch (II->getIntrinsicID()) {
-  default: llvm_unreachable("Unexpected intrinsic!");
-  case Intrinsic::init_trampoline:
-    return II->getArgOperand(0);
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    switch (II->getIntrinsicID()) {
+    default: llvm_unreachable("Unexpected intrinsic!");
+    case Intrinsic::init_trampoline:
+      return II->getArgOperand(0);
+    }
   }
+
+  CallSite CS = I;
+  // All the supported functions so far happen to have dest as their first
+  // argument.
+  return CS.getArgument(0);
 }
 
 static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) {
@@ -455,13 +496,13 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
     Instruction *Inst = BBI++;
 
     // Handle 'free' calls specially.
-    if (CallInst *F = isFreeCall(Inst, AA->getTargetLibraryInfo())) {
+    if (CallInst *F = isFreeCall(Inst, TLI)) {
       MadeChange |= HandleFree(F);
       continue;
     }
 
     // If we find something that writes memory, get its memory dependence.
-    if (!hasMemoryWrite(Inst))
+    if (!hasMemoryWrite(Inst, TLI))
       continue;
 
     MemDepResult InstDep = MD->getDependency(Inst);
@@ -484,7 +525,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
           // in case we need it.
           WeakVH NextInst(BBI);
 
-          DeleteDeadInstruction(SI, *MD, AA->getTargetLibraryInfo());
+          DeleteDeadInstruction(SI, *MD, TLI);
 
           if (NextInst == 0)  // Next instruction deleted.
             BBI = BB.begin();
@@ -531,7 +572,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
                 << *DepWrite << "\n  KILLER: " << *Inst << '\n');
 
           // Delete the store and now-dead instructions that feed it.
-          DeleteDeadInstruction(DepWrite, *MD, AA->getTargetLibraryInfo());
+          DeleteDeadInstruction(DepWrite, *MD, TLI);
           ++NumFastStores;
           MadeChange = true;
 
@@ -628,7 +669,7 @@ bool DSE::HandleFree(CallInst *F) {
     MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB);
     while (Dep.isDef() || Dep.isClobber()) {
       Instruction *Dependency = Dep.getInst();
-      if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
+      if (!hasMemoryWrite(Dependency, TLI) || !isRemovable(Dependency))
         break;
 
       Value *DepPointer =
@@ -641,7 +682,7 @@ bool DSE::HandleFree(CallInst *F) {
       Instruction *Next = llvm::next(BasicBlock::iterator(Dependency));
 
       // DCE instructions only used to calculate that store
-      DeleteDeadInstruction(Dependency, *MD, AA->getTargetLibraryInfo());
+      DeleteDeadInstruction(Dependency, *MD, TLI);
       ++NumFastStores;
       MadeChange = true;
 
@@ -681,8 +722,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
 
     // Okay, so these are dead heap objects, but if the pointer never escapes
     // then it's leaked by this function anyways.
-    else if (isAllocLikeFn(I, AA->getTargetLibraryInfo()) &&
-             !PointerMayBeCaptured(I, true, true))
+    else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true))
       DeadStackObjects.insert(I);
   }
 
@@ -698,7 +738,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
     --BBI;
 
     // If we find a store, check to see if it points into a dead stack value.
-    if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
+    if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) {
       // See through pointer-to-pointer bitcasts
       SmallVector<Value *, 4> Pointers;
       GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers);
@@ -726,8 +766,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
               dbgs() << '\n');
 
         // DCE instructions only used to calculate that store.
-        DeleteDeadInstruction(Dead, *MD, AA->getTargetLibraryInfo(),
-                              &DeadStackObjects);
+        DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects);
         ++NumFastStores;
         MadeChange = true;
         continue;
@@ -735,10 +774,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
     }
 
     // Remove any dead non-memory-mutating instructions.
-    if (isInstructionTriviallyDead(BBI, AA->getTargetLibraryInfo())) {
+    if (isInstructionTriviallyDead(BBI, TLI)) {
       Instruction *Inst = BBI++;
-      DeleteDeadInstruction(Inst, *MD, AA->getTargetLibraryInfo(),
-                            &DeadStackObjects);
+      DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects);
       ++NumFastOther;
       MadeChange = true;
       continue;
@@ -754,7 +792,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
     if (CallSite CS = cast<Value>(BBI)) {
       // Remove allocation function calls from the list of dead stack objects; 
       // there can't be any references before the definition.
-      if (isAllocLikeFn(BBI, AA->getTargetLibraryInfo()))
+      if (isAllocLikeFn(BBI, TLI))
         DeadStackObjects.remove(BBI);
 
       // If this call does not access memory, it can't be loading any of our
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 09a186f7f9..f8709a537f 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -145,7 +145,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   // not user specified.
   unsigned Threshold = CurrentThreshold;
   if (!UserThreshold &&
-      Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
+      Header->getParent()->getFnAttributes().hasOptimizeForSizeAttr())
     Threshold = OptSizeUnrollThreshold;
 
   // Find trip count and trip multiple if count is not available
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 58f7739888..74c8f43ec2 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -638,7 +638,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
   // Check to see if it would be profitable to unswitch current loop.
 
   // Do not do non-trivial unswitch while optimizing for size.
-  if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize))
+  if (OptimizeForSize || F->getFnAttributes().hasOptimizeForSizeAttr())
     return false;
 
   UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index e3182d319c..a8dc0533bf 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -202,11 +202,11 @@ public:
   use_iterator use_begin(const_iterator I) { return Uses[I - begin()].begin(); }
   use_iterator use_end(unsigned Idx) { return Uses[Idx].end(); }
   use_iterator use_end(const_iterator I) { return Uses[I - begin()].end(); }
-  void use_insert(unsigned Idx, use_iterator UI, const PartitionUse &U) {
-    Uses[Idx].insert(UI, U);
+  void use_push_back(unsigned Idx, const PartitionUse &U) {
+    Uses[Idx].push_back(U);
   }
-  void use_insert(const_iterator I, use_iterator UI, const PartitionUse &U) {
-    Uses[I - begin()].insert(UI, U);
+  void use_push_back(const_iterator I, const PartitionUse &U) {
+    Uses[I - begin()].push_back(U);
   }
   void use_erase(unsigned Idx, use_iterator UI) { Uses[Idx].erase(UI); }
   void use_erase(const_iterator I, use_iterator UI) {
@@ -522,8 +522,10 @@ private:
 
   void insertUse(Instruction &I, int64_t Offset, uint64_t Size,
                  bool IsSplittable = false) {
-    // Completely skip uses which don't overlap the allocation.
-    if ((Offset >= 0 && (uint64_t)Offset >= AllocSize) ||
+    // Completely skip uses which have a zero size or don't overlap the
+    // allocation.
+    if (Size == 0 ||
+        (Offset >= 0 && (uint64_t)Offset >= AllocSize) ||
         (Offset < 0 && (uint64_t)-Offset >= Size)) {
       DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
                    << " which starts past the end of the " << AllocSize
@@ -660,11 +662,14 @@ private:
     bool Inserted = false;
     llvm::tie(PMI, Inserted)
       = MemTransferPartitionMap.insert(std::make_pair(&II, NewIdx));
-    if (!Inserted && Offsets.IsSplittable) {
+    if (Offsets.IsSplittable &&
+        (!Inserted || II.getRawSource() == II.getRawDest())) {
       // We've found a memory transfer intrinsic which refers to the alloca as
-      // both a source and dest. We refuse to split these to simplify splitting
-      // logic. If possible, SROA will still split them into separate allocas
-      // and then re-analyze.
+      // both a source and dest. This is detected either by direct equality of
+      // the operand values, or when we visit the intrinsic twice due to two
+      // different chains of values leading to it. We refuse to split these to
+      // simplify splitting logic. If possible, SROA will still split them into
+      // separate allocas and then re-analyze.
       Offsets.IsSplittable = false;
       P.Partitions[PMI->second].IsSplittable = false;
       P.Partitions[NewIdx].IsSplittable = false;
@@ -697,6 +702,9 @@ private:
     SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses;
     Visited.insert(Root);
     Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
+    // If there are no loads or stores, the access is dead. We mark that as
+    // a size zero access.
+    Size = 0;
     do {
       Instruction *I, *UsedI;
       llvm::tie(UsedI, I) = Uses.pop_back_val();
@@ -824,9 +832,9 @@ private:
   }
 
   void insertUse(Instruction &User, int64_t Offset, uint64_t Size) {
-    // If the use extends outside of the allocation, record it as a dead use
-    // for elimination later.
-    if ((uint64_t)Offset >= AllocSize ||
+    // If the use has a zero size or extends outside of the allocation, record
+    // it as a dead use for elimination later.
+    if (Size == 0 || (uint64_t)Offset >= AllocSize ||
         (Offset < 0 && (uint64_t)-Offset >= Size))
       return markAsDead(User);
 
@@ -853,7 +861,7 @@ private:
       PartitionUse NewUse(std::max(I->BeginOffset, BeginOffset),
                           std::min(I->EndOffset, EndOffset),
                           &User, cast<Instruction>(*U));
-      P.Uses[I - P.begin()].push_back(NewUse);
+      P.use_push_back(I, NewUse);
       if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
         P.PHIOrSelectOpMap[std::make_pair(&User, U->get())]
           = std::make_pair(I - P.begin(), P.Uses[I - P.begin()].size() - 1);
@@ -1102,8 +1110,6 @@ AllocaPartitioning::AllocaPartitioning(const TargetData &TD, AllocaInst &AI)
   Uses.resize(Partitions.size());
   UseBuilder UB(TD, AI, *this);
   UB();
-  for (iterator I = Partitions.begin(), E = Partitions.end(); I != E; ++I)
-    std::stable_sort(use_begin(I), use_end(I));
 }
 
 Type *AllocaPartitioning::getCommonType(iterator I) const {
@@ -1890,7 +1896,8 @@ private:
   Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy,
                         uint64_t Offset) {
     assert(IntPromotionTy && "Alloca is not an integer we can extract from");
-    Value *V = IRB.CreateLoad(&NewAI, getName(".load"));
+    Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                     getName(".load"));
     assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
     uint64_t RelOffset = Offset - NewAllocaBeginOffset;
     if (RelOffset)
@@ -1906,7 +1913,7 @@ private:
   StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) {
     IntegerType *Ty = cast<IntegerType>(V->getType());
     if (Ty == IntPromotionTy)
-      return IRB.CreateStore(V, &NewAI);
+      return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
 
     assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() &&
            "Cannot insert a larger integer!");
@@ -1918,10 +1925,12 @@ private:
 
     APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth())
                                .shl(RelOffset*8);
-    Value *Old = IRB.CreateAnd(IRB.CreateLoad(&NewAI, getName(".oldload")),
+    Value *Old = IRB.CreateAnd(IRB.CreateAlignedLoad(&NewAI,
+                                                     NewAI.getAlignment(),
+                                                     getName(".oldload")),
                                Mask, getName(".mask"));
-    return IRB.CreateStore(IRB.CreateOr(Old, V, getName(".insert")),
-                           &NewAI);
+    return IRB.CreateAlignedStore(IRB.CreateOr(Old, V, getName(".insert")),
+                                  &NewAI, NewAI.getAlignment());
   }
 
   void deleteIfTriviallyDead(Value *V) {
@@ -1943,12 +1952,12 @@ private:
     Value *Result;
     if (LI.getType() == VecTy->getElementType() ||
         BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
-      Result
-        = IRB.CreateExtractElement(IRB.CreateLoad(&NewAI, getName(".load")),
-                                   getIndex(IRB, BeginOffset),
-                                   getName(".extract"));
+      Result = IRB.CreateExtractElement(
+        IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
+        getIndex(IRB, BeginOffset), getName(".extract"));
     } else {
-      Result = IRB.CreateLoad(&NewAI, getName(".load"));
+      Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                     getName(".load"));
     }
     if (Result->getType() != LI.getType())
       Result = getValueCast(IRB, Result, LI.getType());
@@ -1983,6 +1992,9 @@ private:
     Value *NewPtr = getAdjustedAllocaPtr(IRB,
                                          LI.getPointerOperand()->getType());
     LI.setOperand(0, NewPtr);
+    if (LI.getAlignment())
+      LI.setAlignment(MinAlign(NewAI.getAlignment(),
+                               BeginOffset - NewAllocaBeginOffset));
     DEBUG(dbgs() << "          to: " << LI << "\n");
 
     deleteIfTriviallyDead(OldOp);
@@ -1996,13 +2008,14 @@ private:
         BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
       if (V->getType() != ElementTy)
         V = getValueCast(IRB, V, ElementTy);
-      V = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V,
-                                  getIndex(IRB, BeginOffset),
+      LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                           getName(".load"));
+      V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
                                   getName(".insert"));
     } else if (V->getType() != VecTy) {
       V = getValueCast(IRB, V, VecTy);
     }
-    StoreInst *Store = IRB.CreateStore(V, &NewAI);
+    StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
     Pass.DeadInsts.push_back(&SI);
 
     (void)Store;
@@ -2033,6 +2046,9 @@ private:
     Value *NewPtr = getAdjustedAllocaPtr(IRB,
                                          SI.getPointerOperand()->getType());
     SI.setOperand(1, NewPtr);
+    if (SI.getAlignment())
+      SI.setAlignment(MinAlign(NewAI.getAlignment(),
+                               BeginOffset - NewAllocaBeginOffset));
     DEBUG(dbgs() << "          to: " << SI << "\n");
 
     deleteIfTriviallyDead(OldOp);
@@ -2048,6 +2064,15 @@ private:
     // pointer to the new alloca.
     if (!isa<Constant>(II.getLength())) {
       II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()));
+
+      Type *CstTy = II.getAlignmentCst()->getType();
+      if (!NewAI.getAlignment())
+        II.setAlignment(ConstantInt::get(CstTy, 0));
+      else
+        II.setAlignment(
+          ConstantInt::get(CstTy, MinAlign(NewAI.getAlignment(),
+                                           BeginOffset - NewAllocaBeginOffset)));
+
       deleteIfTriviallyDead(OldPtr);
       return false;
     }
@@ -2067,11 +2092,15 @@ private:
                    !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
       Type *SizeTy = II.getLength()->getType();
       Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
+      unsigned Align = 1;
+      if (NewAI.getAlignment())
+        Align = MinAlign(NewAI.getAlignment(),
+                         BeginOffset - NewAllocaBeginOffset);
 
       CallInst *New
         = IRB.CreateMemSet(getAdjustedAllocaPtr(IRB,
                                                 II.getRawDest()->getType()),
-                           II.getValue(), Size, II.getAlignment(),
+                           II.getValue(), Size, Align,
                            II.isVolatile());
       (void)New;
       DEBUG(dbgs() << "          to: " << *New << "\n");
@@ -2109,11 +2138,13 @@ private:
     // If this is an element-wide memset of a vectorizable alloca, insert it.
     if (VecTy && (BeginOffset > NewAllocaBeginOffset ||
                   EndOffset < NewAllocaEndOffset)) {
-      StoreInst *Store = IRB.CreateStore(
-        IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V,
-                                getIndex(IRB, BeginOffset),
+      StoreInst *Store = IRB.CreateAlignedStore(
+        IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
+                                                      NewAI.getAlignment(),
+                                                      getName(".load")),
+                                V, getIndex(IRB, BeginOffset),
                                 getName(".insert")),
-        &NewAI);
+        &NewAI, NewAI.getAlignment());
       (void)Store;
       DEBUG(dbgs() << "          to: " << *Store << "\n");
       return true;
@@ -2131,7 +2162,8 @@ private:
       assert(V->getType() == VecTy);
     }
 
-    Value *New = IRB.CreateStore(V, &NewAI, II.isVolatile());
+    Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
+                                        II.isVolatile());
     (void)New;
     DEBUG(dbgs() << "          to: " << *New << "\n");
     return !II.isVolatile();
@@ -2164,6 +2196,13 @@ private:
       else
         II.setSource(getAdjustedAllocaPtr(IRB, II.getRawSource()->getType()));
 
+      Type *CstTy = II.getAlignmentCst()->getType();
+      if (II.getAlignment() > 1)
+        II.setAlignment(ConstantInt::get(
+            CstTy, MinAlign(II.getAlignment(),
+                            MinAlign(NewAI.getAlignment(),
+                                     BeginOffset - NewAllocaBeginOffset))));
+
       DEBUG(dbgs() << "          to: " << II << "\n");
       deleteIfTriviallyDead(OldOp);
       return false;
@@ -2221,6 +2260,11 @@ private:
     OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
                               getName("." + OtherPtr->getName()));
 
+    unsigned Align = II.getAlignment();
+    if (Align > 1)
+      Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(),
+                       MinAlign(II.getAlignment(), NewAI.getAlignment()));
+
     // Strip all inbounds GEPs and pointer casts to try to dig out any root
     // alloca that should be re-examined after rewriting this instruction.
     if (AllocaInst *AI
@@ -2236,8 +2280,7 @@ private:
 
       CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr,
                                        IsDest ? OtherPtr : OurPtr,
-                                       Size, II.getAlignment(),
-                                       II.isVolatile());
+                                       Size, Align, II.isVolatile());
       (void)New;
       DEBUG(dbgs() << "          to: " << *New << "\n");
       return false;
@@ -2251,22 +2294,25 @@ private:
     Value *Src;
     if (IsVectorElement && !IsDest) {
       // We have to extract rather than load.
-      Src = IRB.CreateExtractElement(IRB.CreateLoad(SrcPtr,
-                                                    getName(".copyload")),
-                                     getIndex(IRB, BeginOffset),
-                                     getName(".copyextract"));
+      Src = IRB.CreateExtractElement(
+        IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
+        getIndex(IRB, BeginOffset),
+        getName(".copyextract"));
     } else {
-      Src = IRB.CreateLoad(SrcPtr, II.isVolatile(), getName(".copyload"));
+      Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
+                                  getName(".copyload"));
     }
 
     if (IsVectorElement && IsDest) {
       // We have to insert into a loaded copy before storing.
-      Src = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")),
-                                    Src, getIndex(IRB, BeginOffset),
-                                    getName(".insert"));
+      Src = IRB.CreateInsertElement(
+        IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
+        Src, getIndex(IRB, BeginOffset),
+        getName(".insert"));
     }
 
-    Value *Store = IRB.CreateStore(Src, DstPtr, II.isVolatile());
+    StoreInst *Store = cast<StoreInst>(
+      IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile()));
     (void)Store;
     DEBUG(dbgs() << "          to: " << *Store << "\n");
     return !II.isVolatile();
@@ -2460,8 +2506,7 @@ private:
         else {
           AllocaPartitioning::PartitionUse OtherUse = *UI;
           OtherUse.User = Load;
-          P.use_insert(PI, std::upper_bound(UI, P.use_end(PI), OtherUse),
-                       OtherUse);
+          P.use_push_back(PI, OtherUse);
         }
       }
     }
@@ -2559,7 +2604,7 @@ private:
         LoadInst *OtherLoad = IsTrueVal ? FL : TL;
         assert(OtherUse.Ptr == OtherLoad->getOperand(0));
         OtherUse.User = OtherLoad;
-        P.use_insert(PI, P.use_end(PI), OtherUse);
+        P.use_push_back(PI, OtherUse);
       }
 
       // Transfer alignment and TBAA info if present.
@@ -2576,8 +2621,6 @@ private:
       LI->replaceAllUsesWith(V);
       Pass.DeadInsts.push_back(LI);
     }
-    if (PI != P.end())
-      std::stable_sort(P.use_begin(PI), P.use_end(PI));
 
     deleteIfTriviallyDead(OldPtr);
     return NewPtr == &NewAI;
@@ -2959,9 +3002,19 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
     assert(PI == P.begin() && "Begin offset is zero on later partition");
     NewAI = &AI;
   } else {
-    // FIXME: The alignment here is overly conservative -- we could in many
-    // cases get away with much weaker alignment constraints.
-    NewAI = new AllocaInst(AllocaTy, 0, AI.getAlignment(),
+    unsigned Alignment = AI.getAlignment();
+    if (!Alignment) {
+      // The minimum alignment which users can rely on when the explicit
+      // alignment is omitted or zero is that required by the ABI for this
+      // type.
+      Alignment = TD->getABITypeAlignment(AI.getAllocatedType());
+    }
+    Alignment = MinAlign(Alignment, PI->BeginOffset);
+    // If we will get at least this much alignment from the type alone, leave
+    // the alloca's alignment unconstrained.
+    if (Alignment <= TD->getABITypeAlignment(AllocaTy))
+      Alignment = 0;
+    NewAI = new AllocaInst(AllocaTy, 0, Alignment,
                            AI.getName() + ".sroa." + Twine(PI - P.begin()),
                            &AI);
     ++NumNewAllocas;
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index 9d630349ab..55227e2714 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -23,11 +23,69 @@
 
 using namespace llvm;
 
+/// Generate code to compute the remainder of two signed integers. Returns the
+/// remainder, which will have the sign of the dividend. Builder's insert point
+/// should be pointing where the caller wants code generated, e.g. at the srem
+/// instruction. This will generate a urem in the process, and Builder's insert
+/// point will be pointing at the uren (if present, i.e. not folded), ready to
+/// be expanded if the user wishes
+static Value *generateSignedRemainderCode(Value *Dividend, Value *Divisor,
+                                          IRBuilder<> &Builder) {
+  ConstantInt *ThirtyOne = Builder.getInt32(31);
+
+  // ;   %dividend_sgn = ashr i32 %dividend, 31
+  // ;   %divisor_sgn  = ashr i32 %divisor, 31
+  // ;   %dvd_xor      = xor i32 %dividend, %dividend_sgn
+  // ;   %dvs_xor      = xor i32 %divisor, %divisor_sgn
+  // ;   %u_dividend   = sub i32 %dvd_xor, %dividend_sgn
+  // ;   %u_divisor    = sub i32 %dvs_xor, %divisor_sgn
+  // ;   %urem         = urem i32 %dividend, %divisor
+  // ;   %xored        = xor i32 %urem, %dividend_sgn
+  // ;   %srem         = sub i32 %xored, %dividend_sgn
+  Value *DividendSign = Builder.CreateAShr(Dividend, ThirtyOne);
+  Value *DivisorSign  = Builder.CreateAShr(Divisor, ThirtyOne);
+  Value *DvdXor       = Builder.CreateXor(Dividend, DividendSign);
+  Value *DvsXor       = Builder.CreateXor(Divisor, DivisorSign);
+  Value *UDividend    = Builder.CreateSub(DvdXor, DividendSign);
+  Value *UDivisor     = Builder.CreateSub(DvsXor, DivisorSign);
+  Value *URem         = Builder.CreateURem(UDividend, UDivisor);
+  Value *Xored        = Builder.CreateXor(URem, DividendSign);
+  Value *SRem         = Builder.CreateSub(Xored, DividendSign);
+
+  if (Instruction *URemInst = dyn_cast<Instruction>(URem))
+    Builder.SetInsertPoint(URemInst);
+
+  return SRem;
+}
+
+
+/// Generate code to compute the remainder of two unsigned integers. Returns the
+/// remainder. Builder's insert point should be pointing where the caller wants
+/// code generated, e.g. at the urem instruction. This will generate a udiv in
+/// the process, and Builder's insert point will be pointing at the udiv (if
+/// present, i.e. not folded), ready to be expanded if the user wishes
+static Value *generatedUnsignedRemainderCode(Value *Dividend, Value *Divisor,
+                                             IRBuilder<> &Builder) {
+  // Remainder = Dividend - Quotient*Divisor
+
+  // ;   %quotient  = udiv i32 %dividend, %divisor
+  // ;   %product   = mul i32 %divisor, %quotient
+  // ;   %remainder = sub i32 %dividend, %product
+  Value *Quotient  = Builder.CreateUDiv(Dividend, Divisor);
+  Value *Product   = Builder.CreateMul(Divisor, Quotient);
+  Value *Remainder = Builder.CreateSub(Dividend, Product);
+
+  if (Instruction *UDiv = dyn_cast<Instruction>(Quotient))
+    Builder.SetInsertPoint(UDiv);
+
+  return Remainder;
+}
+
 /// Generate code to divide two signed integers. Returns the quotient, rounded
-/// towards 0. Builder's insert point should be pointing at the sdiv
-/// instruction. This will generate a udiv in the process, and Builder's insert
-/// point will be pointing at the udiv (if present, i.e. not folded), ready to
-/// be expanded if the user wishes.
+/// towards 0. Builder's insert point should be pointing where the caller wants
+/// code generated, e.g. at the sdiv instruction. This will generate a udiv in
+/// the process, and Builder's insert point will be pointing at the udiv (if
+/// present, i.e. not folded), ready to be expanded if the user wishes.
 static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
                                          IRBuilder<> &Builder) {
   // Implementation taken from compiler-rt's __divsi3
@@ -62,8 +120,8 @@ static Value *generateSignedDivisionCode(Value *Dividend, Value *Divisor,
 }
 
 /// Generates code to divide two unsigned scalar 32-bit integers. Returns the
-/// quotient, rounded towards 0. Builder's insert point should be pointing at
-/// the udiv instruction.
+/// quotient, rounded towards 0. Builder's insert point should be pointing where
+/// the caller wants code generated, e.g. at the udiv instruction.
 static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
                                            IRBuilder<> &Builder) {
   // The basic algorithm can be found in the compiler-rt project's
@@ -265,6 +323,56 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,
   return Q_5;
 }
 
+/// Generate code to calculate the remainder of two integers, replacing Rem with
+/// the generated code. This currently generates code using the udiv expansion,
+/// but future work includes generating more specialized code, e.g. when more
+/// information about the operands are known. Currently only implements 32bit
+/// scalar division (due to udiv's limitation), but future work is removing this
+/// limitation.
+///
+/// @brief Replace Rem with generated code.
+bool llvm::expandRemainder(BinaryOperator *Rem) {
+  assert((Rem->getOpcode() == Instruction::SRem ||
+          Rem->getOpcode() == Instruction::URem) &&
+         "Trying to expand remainder from a non-remainder function");
+
+  IRBuilder<> Builder(Rem);
+
+  // First prepare the sign if it's a signed remainder
+  if (Rem->getOpcode() == Instruction::SRem) {
+    Value *Remainder = generateSignedRemainderCode(Rem->getOperand(0),
+                                                   Rem->getOperand(1), Builder);
+
+    Rem->replaceAllUsesWith(Remainder);
+    Rem->dropAllReferences();
+    Rem->eraseFromParent();
+
+    // If we didn't actually generate a udiv instruction, we're done
+    BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
+    if (!BO || BO->getOpcode() != Instruction::URem)
+      return true;
+
+    Rem = BO;
+  }
+
+  Value *Remainder = generatedUnsignedRemainderCode(Rem->getOperand(0),
+                                                    Rem->getOperand(1),
+                                                    Builder);
+
+  Rem->replaceAllUsesWith(Remainder);
+  Rem->dropAllReferences();
+  Rem->eraseFromParent();
+
+  // Expand the udiv
+  if (BinaryOperator *UDiv = dyn_cast<BinaryOperator>(Builder.GetInsertPoint())) {
+    assert(UDiv->getOpcode() == Instruction::UDiv && "Non-udiv in expansion?");
+    expandDivision(UDiv);
+  }
+
+  return true;
+}
+
+
 /// Generate code to divide two integers, replacing Div with the generated
 /// code. This currently generates code similarly to compiler-rt's
 /// implementations, but future work includes generating more specialized code
@@ -287,7 +395,7 @@ bool llvm::expandDivision(BinaryOperator *Div) {
   if (Div->getOpcode() == Instruction::SDiv) {
     // Lower the code to unsigned division, and reset Div to point to the udiv.
     Value *Quotient = generateSignedDivisionCode(Div->getOperand(0),
-                                                Div->getOperand(1), Builder);
+                                                 Div->getOperand(1), Builder);
     Div->replaceAllUsesWith(Quotient);
     Div->dropAllReferences();
     Div->eraseFromParent();
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 876ff2c337..065325b7c2 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -58,9 +58,10 @@ static cl::opt<bool>
 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
        cl::desc("Sink common instructions down to the end block"));
 
-STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
 STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
 STATISTIC(NumSinkCommons, "Number of common instructions sunk down to the end block");
+STATISTIC(NumSpeculations, "Number of speculative executed instructions");
 
 namespace {
   /// ValueEqualityComparisonCase - Represents a case of a switch.
@@ -3240,83 +3241,227 @@ static bool GetCaseResults(SwitchInst *SI,
   return true;
 }
 
-/// BuildLookupTable - Build a lookup table with the contents of Results, using
-/// DefaultResult to fill the holes in the table. If the table ends up
-/// containing the same result in each element, set *SingleResult to that value
-/// and return NULL.
-static GlobalVariable *BuildLookupTable(Module &M,
-                                        uint64_t TableSize,
-                                        ConstantInt *Offset,
-              const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Results,
-                                        Constant *DefaultResult,
-                                        Constant **SingleResult) {
-  assert(Results.size() && "Need values to build lookup table");
-  assert(TableSize >= Results.size() && "Table needs to hold all values");
+namespace {
+  /// SwitchLookupTable - This class represents a lookup table that can be used
+  /// to replace a switch.
+  class SwitchLookupTable {
+  public:
+    /// SwitchLookupTable - Create a lookup table to use as a switch replacement
+    /// with the contents of Values, using DefaultValue to fill any holes in the
+    /// table.
+    SwitchLookupTable(Module &M,
+                      uint64_t TableSize,
+                      ConstantInt *Offset,
+               const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+                      Constant *DefaultValue,
+                      const TargetData *TD);
+
+    /// BuildLookup - Build instructions with Builder to retrieve the value at
+    /// the position given by Index in the lookup table.
+    Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
+
+    /// WouldFitInRegister - Return true if a table with TableSize elements of
+    /// type ElementType would fit in a target-legal register.
+    static bool WouldFitInRegister(const TargetData *TD,
+                                   uint64_t TableSize,
+                                   const Type *ElementType);
+
+  private:
+    // Depending on the contents of the table, it can be represented in
+    // different ways.
+    enum {
+      // For tables where each element contains the same value, we just have to
+      // store that single value and return it for each lookup.
+      SingleValueKind,
+
+      // For small tables with integer elements, we can pack them into a bitmap
+      // that fits into a target-legal register. Values are retrieved by
+      // shift and mask operations.
+      BitMapKind,
+
+      // The table is stored as an array of values. Values are retrieved by load
+      // instructions from the table.
+      ArrayKind
+    } Kind;
+
+    // For SingleValueKind, this is the single value.
+    Constant *SingleValue;
+
+    // For BitMapKind, this is the bitmap.
+    ConstantInt *BitMap;
+    IntegerType *BitMapElementTy;
+
+    // For ArrayKind, this is the array.
+    GlobalVariable *Array;
+  };
+}
+
+SwitchLookupTable::SwitchLookupTable(Module &M,
+                                     uint64_t TableSize,
+                                     ConstantInt *Offset,
+               const SmallVector<std::pair<ConstantInt*, Constant*>, 4>& Values,
+                                     Constant *DefaultValue,
+                                     const TargetData *TD) {
+  assert(Values.size() && "Can't build lookup table without values!");
+  assert(TableSize >= Values.size() && "Can't fit values in table!");
 
   // If all values in the table are equal, this is that value.
-  Constant *SameResult = Results.begin()->second;
+  SingleValue = Values.begin()->second;
 
   // Build up the table contents.
-  std::vector<Constant*> TableContents(TableSize);
-  for (size_t I = 0, E = Results.size(); I != E; ++I) {
-    ConstantInt *CaseVal = Results[I].first;
-    Constant *CaseRes = Results[I].second;
-
-    uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
+  SmallVector<Constant*, 64> TableContents(TableSize);
+  for (size_t I = 0, E = Values.size(); I != E; ++I) {
+    ConstantInt *CaseVal = Values[I].first;
+    Constant *CaseRes = Values[I].second;
+    assert(CaseRes->getType() == DefaultValue->getType());
+
+    uint64_t Idx = (CaseVal->getValue() - Offset->getValue())
+                   .getLimitedValue();
     TableContents[Idx] = CaseRes;
 
-    if (CaseRes != SameResult)
-      SameResult = NULL;
+    if (CaseRes != SingleValue)
+      SingleValue = NULL;
   }
 
   // Fill in any holes in the table with the default result.
-  if (Results.size() < TableSize) {
-    for (unsigned i = 0; i < TableSize; ++i) {
-      if (!TableContents[i])
-        TableContents[i] = DefaultResult;
+  if (Values.size() < TableSize) {
+    for (uint64_t I = 0; I < TableSize; ++I) {
+      if (!TableContents[I])
+        TableContents[I] = DefaultValue;
     }
 
-    if (DefaultResult != SameResult)
-      SameResult = NULL;
+    if (DefaultValue != SingleValue)
+      SingleValue = NULL;
+  }
+
+  // If each element in the table contains the same value, we only need to store
+  // that single value.
+  if (SingleValue) {
+    Kind = SingleValueKind;
+    return;
   }
 
-  // Same result was used in the entire table; just return that.
-  if (SameResult) {
-    *SingleResult = SameResult;
-    return NULL;
+  // If the type is integer and the table fits in a register, build a bitmap.
+  if (WouldFitInRegister(TD, TableSize, DefaultValue->getType())) {
+    IntegerType *IT = cast<IntegerType>(DefaultValue->getType());
+    APInt TableInt(TableSize * IT->getBitWidth(), 0);
+    for (uint64_t I = TableSize; I > 0; --I) {
+      TableInt <<= IT->getBitWidth();
+      ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
+      TableInt |= Val->getValue().zext(TableInt.getBitWidth());
+    }
+    BitMap = ConstantInt::get(M.getContext(), TableInt);
+    BitMapElementTy = IT;
+    Kind = BitMapKind;
+    ++NumBitMaps;
+    return;
   }
 
-  ArrayType *ArrayTy = ArrayType::get(DefaultResult->getType(), TableSize);
+  // Store the table in an array.
+  ArrayType *ArrayTy = ArrayType::get(DefaultValue->getType(), TableSize);
   Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
 
-  GlobalVariable *GV = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
-                                          GlobalVariable::PrivateLinkage,
-                                          Initializer,
-                                          "switch.table");
-  GV->setUnnamedAddr(true);
-  return GV;
+  Array = new GlobalVariable(M, ArrayTy, /*constant=*/ true,
+                             GlobalVariable::PrivateLinkage,
+                             Initializer,
+                             "switch.table");
+  Array->setUnnamedAddr(true);
+  Kind = ArrayKind;
+}
+
+Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
+  switch (Kind) {
+    case SingleValueKind:
+      return SingleValue;
+    case BitMapKind: {
+      // Type of the bitmap (e.g. i59).
+      IntegerType *MapTy = BitMap->getType();
+
+      // Cast Index to the same type as the bitmap.
+      // Note: The Index is <= the number of elements in the table, so
+      // truncating it to the width of the bitmask is safe.
+      Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
+
+      // Multiply the shift amount by the element width.
+      ShiftAmt = Builder.CreateMul(ShiftAmt,
+                      ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
+                                   "switch.shiftamt");
+
+      // Shift down.
+      Value *DownShifted = Builder.CreateLShr(BitMap, ShiftAmt,
+                                              "switch.downshift");
+      // Mask off.
+      return Builder.CreateTrunc(DownShifted, BitMapElementTy,
+                                 "switch.masked");
+    }
+    case ArrayKind: {
+      Value *GEPIndices[] = { Builder.getInt32(0), Index };
+      Value *GEP = Builder.CreateInBoundsGEP(Array, GEPIndices,
+                                             "switch.gep");
+      return Builder.CreateLoad(GEP, "switch.load");
+    }
+  }
+  llvm_unreachable("Unknown lookup table kind!");
+}
+
+bool SwitchLookupTable::WouldFitInRegister(const TargetData *TD,
+                                           uint64_t TableSize,
+                                           const Type *ElementType) {
+  if (!TD)
+    return false;
+  const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
+  if (!IT)
+    return false;
+  // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
+  // are <= 15, we could try to narrow the type.
+
+  // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
+  if (TableSize >= UINT_MAX/IT->getBitWidth())
+    return false;
+  return TD->fitsInLegalInteger(TableSize * IT->getBitWidth());
+}
+
+/// ShouldBuildLookupTable - Determine whether a lookup table should be built
+/// for this switch, based on the number of caes, size of the table and the
+/// types of the results.
+static bool ShouldBuildLookupTable(SwitchInst *SI,
+                                   uint64_t TableSize,
+                                   const TargetData *TD,
+                            const SmallDenseMap<PHINode*, Type*>& ResultTypes) {
+  // The table density should be at least 40%. This is the same criterion as for
+  // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
+  // FIXME: Find the best cut-off.
+  if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
+    return false; // TableSize overflowed, or mul below might overflow.
+  if (SI->getNumCases() * 10 >= TableSize * 4)
+    return true;
+
+  // If each table would fit in a register, we should build it anyway.
+  for (SmallDenseMap<PHINode*, Type*>::const_iterator I = ResultTypes.begin(),
+       E = ResultTypes.end(); I != E; ++I) {
+    if (!SwitchLookupTable::WouldFitInRegister(TD, TableSize, I->second))
+      return false;
+  }
+  return true;
 }
 
 /// SwitchToLookupTable - If the switch is only used to initialize one or more
 /// phi nodes in a common successor block with different constant values,
 /// replace the switch with lookup tables.
 static bool SwitchToLookupTable(SwitchInst *SI,
-                                IRBuilder<> &Builder) {
+                                IRBuilder<> &Builder,
+                                const TargetData* TD) {
   assert(SI->getNumCases() > 1 && "Degenerate switch?");
   // FIXME: Handle unreachable cases.
 
   // FIXME: If the switch is too sparse for a lookup table, perhaps we could
   // split off a dense part and build a lookup table for that.
 
-  // FIXME: If the results are all integers and the lookup table would fit in a
-  // target-legal register, we should store them as a bitmap and use shift/mask
-  // to look up the result.
-
   // FIXME: This creates arrays of GEPs to constant strings, which means each
   // GEP needs a runtime relocation in PIC code. We should just build one big
   // string and lookup indices into that.
 
-  // Ignore the switch if the number of cases are too small.
+  // Ignore the switch if the number of cases is too small.
   // This is similar to the check when building jump tables in
   // SelectionDAGBuilder::handleJTSwitchCase.
   // FIXME: Determine the best cut-off.
@@ -3370,33 +3515,12 @@ static bool SwitchToLookupTable(SwitchInst *SI,
   }
 
   APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
-  // The table density should be at lest 40%. This is the same criterion as for
-  // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
-  // FIXME: Find the best cut-off.
-  // Be careful to avoid overlow in the density computation.
-  if (RangeSpread.zextOrSelf(64).ugt(UINT64_MAX / 4 - 1))
-    return false;
   uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
-  if (SI->getNumCases() * 10 < TableSize * 4)
+  if (!ShouldBuildLookupTable(SI, TableSize, TD, ResultTypes))
     return false;
 
-  // Build the lookup tables.
-  SmallDenseMap<PHINode*, GlobalVariable*> LookupTables;
-  SmallDenseMap<PHINode*, Constant*> SingleResults;
-
-  Module &Mod = *CommonDest->getParent()->getParent();
-  for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end();
-       I != E; ++I) {
-    PHINode *PHI = *I;
-
-    Constant *SingleResult = NULL;
-    LookupTables[PHI] = BuildLookupTable(Mod, TableSize, MinCaseVal,
-                                         ResultLists[PHI], DefaultResults[PHI],
-                                         &SingleResult);
-    SingleResults[PHI] = SingleResult;
-  }
-
   // Create the BB that does the lookups.
+  Module &Mod = *CommonDest->getParent()->getParent();
   BasicBlock *LookupBB = BasicBlock::Create(Mod.getContext(),
                                             "switch.lookup",
                                             CommonDest->getParent(),
@@ -3414,19 +3538,13 @@ static bool SwitchToLookupTable(SwitchInst *SI,
   // Populate the BB that does the lookups.
   Builder.SetInsertPoint(LookupBB);
   bool ReturnedEarly = false;
-  for (SmallVector<PHINode*, 4>::iterator I = PHIs.begin(), E = PHIs.end();
-       I != E; ++I) {
-    PHINode *PHI = *I;
-    // There was a single result for this phi; just use that.
-    if (Constant *SingleResult = SingleResults[PHI]) {
-      PHI->addIncoming(SingleResult, LookupBB);
-      continue;
-    }
+  for (size_t I = 0, E = PHIs.size(); I != E; ++I) {
+    PHINode *PHI = PHIs[I];
+
+    SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultLists[PHI],
+                            DefaultResults[PHI], TD);
 
-    Value *GEPIndices[] = { Builder.getInt32(0), TableIndex };
-    Value *GEP = Builder.CreateInBoundsGEP(LookupTables[PHI], GEPIndices,
-                                           "switch.gep");
-    Value *Result = Builder.CreateLoad(GEP, "switch.load");
+    Value *Result = Table.BuildLookup(TableIndex, Builder);
 
     // If the result is used to return immediately from the function, we want to
     // do that right here.
@@ -3494,7 +3612,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   if (ForwardSwitchConditionToPHI(SI))
     return SimplifyCFG(BB) | true;
 
-  if (SwitchToLookupTable(SI, Builder))
+  if (SwitchToLookupTable(SI, Builder, TD))
     return SimplifyCFG(BB) | true;
 
   return false;
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index fc2538db64..a30b09321b 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -21,7 +21,7 @@
 using namespace llvm;
 
 // Out of line method to get vtable etc for class.
-void ValueMapTypeRemapper::Anchor() {}
+void ValueMapTypeRemapper::anchor() {}
 
 Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
                       ValueMapTypeRemapper *TypeMapper) {