Merge commit '72f0976c1b91c7ba50dce4d0ad0289dc14d37f81'

Conflicts: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/Mips/MipsISelLowering.cpp lib/Target/Mips/MipsSubtarget.cpp
author: Derek Schuff <dschuff@chromium.org> 2012-10-01 11:20:30 -0700
committer: Derek Schuff <dschuff@chromium.org> 2012-10-01 11:20:30 -0700
commit: b3423dd295c69f78bd731f1ad65ad90ce3efa36f (patch)
tree: 0e872df2f0333ed1806d9e0a6906b2f5ebd58512 /lib/Transforms/Scalar
parent: a27c28b1427dc2082ab2b31efdbb25f9fde31b61 (diff)
parent: 72f0976c1b91c7ba50dce4d0ad0289dc14d37f81 (diff)
6 files changed, 200 insertions, 103 deletions
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 495cdc6321..305d70f27b 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -149,7 +149,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   TLInfo = &getAnalysis<TargetLibraryInfo>();
   DT = getAnalysisIfAvailable<DominatorTree>();
   PFI = getAnalysisIfAvailable<ProfileInfo>();
-  OptSize = F.hasFnAttr(Attribute::OptimizeForSize);
+  OptSize = F.getFnAttributes().hasOptimizeForSizeAttr();
 
   /// This optimization identifies DIV instructions that can be
   /// profitably bypassed and carried out with a shorter, faster divide.
@@ -226,7 +226,8 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
     // edge, just collapse it.
     BasicBlock *SinglePred = BB->getSinglePredecessor();
 
-    if (!SinglePred || SinglePred == BB) continue;
+    // Don't merge if BB's address is taken.
+    if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
 
     BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
     if (Term && !Term->isConditional()) {
@@ -788,7 +789,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
   }
 
   // If we eliminated all predecessors of the block, delete the block now.
-  if (Changed && pred_begin(BB) == pred_end(BB))
+  if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
     BB->eraseFromParent();
 
   return Changed;
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 9b0aadb0b5..3ec6f3dcc3 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -235,6 +235,11 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
       // This case never fires - remove it.
       CI.getCaseSuccessor()->removePredecessor(BB);
       SI->removeCase(CI); // Does not invalidate the iterator.
+
+      // The condition can be modified by removePredecessor's PHI simplification
+      // logic.
+      Cond = SI->getCondition();
+
       ++NumDeadCases;
       Changed = true;
     } else if (State == LazyValueInfo::True) {
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 1ff4329c84..301ee2f663 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -30,6 +30,7 @@
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/ADT/SetVector.h"
@@ -45,6 +46,7 @@ namespace {
     AliasAnalysis *AA;
     MemoryDependenceAnalysis *MD;
     DominatorTree *DT;
+    const TargetLibraryInfo *TLI;
 
     static char ID; // Pass identification, replacement for typeid
     DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) {
@@ -55,6 +57,7 @@ namespace {
       AA = &getAnalysis<AliasAnalysis>();
       MD = &getAnalysis<MemoryDependenceAnalysis>();
       DT = &getAnalysis<DominatorTree>();
+      TLI = AA->getTargetLibraryInfo();
 
       bool Changed = false;
       for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
@@ -144,7 +147,7 @@ static void DeleteDeadInstruction(Instruction *I,
 
 /// hasMemoryWrite - Does this instruction write some memory?  This only returns
 /// true for things that we can analyze with other helpers below.
-static bool hasMemoryWrite(Instruction *I) {
+static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
   if (isa<StoreInst>(I))
     return true;
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
@@ -159,6 +162,26 @@ static bool hasMemoryWrite(Instruction *I) {
       return true;
     }
   }
+  if (CallSite CS = I) {
+    if (Function *F = CS.getCalledFunction()) {
+      if (TLI && TLI->has(LibFunc::strcpy) &&
+          F->getName() == TLI->getName(LibFunc::strcpy)) {
+        return true;
+      }
+      if (TLI && TLI->has(LibFunc::strncpy) &&
+          F->getName() == TLI->getName(LibFunc::strncpy)) {
+        return true;
+      }
+      if (TLI && TLI->has(LibFunc::strcat) &&
+          F->getName() == TLI->getName(LibFunc::strcat)) {
+        return true;
+      }
+      if (TLI && TLI->has(LibFunc::strncat) &&
+          F->getName() == TLI->getName(LibFunc::strncat)) {
+        return true;
+      }
+    }
+  }
   return false;
 }
 
@@ -206,7 +229,8 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
 /// instruction if any.
 static AliasAnalysis::Location
 getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
-  assert(hasMemoryWrite(Inst) && "Unknown instruction case");
+  assert(hasMemoryWrite(Inst, AA.getTargetLibraryInfo()) &&
+         "Unknown instruction case");
 
   // The only instructions that both read and write are the mem transfer
   // instructions (memcpy/memmove).
@@ -223,23 +247,29 @@ static bool isRemovable(Instruction *I) {
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
     return SI->isUnordered();
 
-  IntrinsicInst *II = cast<IntrinsicInst>(I);
-  switch (II->getIntrinsicID()) {
-  default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate");
-  case Intrinsic::lifetime_end:
-    // Never remove dead lifetime_end's, e.g. because it is followed by a
-    // free.
-    return false;
-  case Intrinsic::init_trampoline:
-    // Always safe to remove init_trampoline.
-    return true;
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    switch (II->getIntrinsicID()) {
+    default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate");
+    case Intrinsic::lifetime_end:
+      // Never remove dead lifetime_end's, e.g. because it is followed by a
+      // free.
+      return false;
+    case Intrinsic::init_trampoline:
+      // Always safe to remove init_trampoline.
+      return true;
 
-  case Intrinsic::memset:
-  case Intrinsic::memmove:
-  case Intrinsic::memcpy:
-    // Don't remove volatile memory intrinsics.
-    return !cast<MemIntrinsic>(II)->isVolatile();
+    case Intrinsic::memset:
+    case Intrinsic::memmove:
+    case Intrinsic::memcpy:
+      // Don't remove volatile memory intrinsics.
+      return !cast<MemIntrinsic>(II)->isVolatile();
+    }
   }
+
+  if (CallSite CS = I)
+    return CS.getInstruction()->use_empty();
+
+  return false;
 }
 
 
@@ -250,14 +280,19 @@ static bool isShortenable(Instruction *I) {
   if (isa<StoreInst>(I))
     return false;
 
-  IntrinsicInst *II = cast<IntrinsicInst>(I);
-  switch (II->getIntrinsicID()) {
-    default: return false;
-    case Intrinsic::memset:
-    case Intrinsic::memcpy:
-      // Do shorten memory intrinsics.
-      return true;
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    switch (II->getIntrinsicID()) {
+      default: return false;
+      case Intrinsic::memset:
+      case Intrinsic::memcpy:
+        // Do shorten memory intrinsics.
+        return true;
+    }
   }
+
+  // Don't shorten libcalls calls for now.
+
+  return false;
 }
 
 /// getStoredPointerOperand - Return the pointer that is being written to.
@@ -267,12 +302,18 @@ static Value *getStoredPointerOperand(Instruction *I) {
   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
     return MI->getDest();
 
-  IntrinsicInst *II = cast<IntrinsicInst>(I);
-  switch (II->getIntrinsicID()) {
-  default: llvm_unreachable("Unexpected intrinsic!");
-  case Intrinsic::init_trampoline:
-    return II->getArgOperand(0);
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    switch (II->getIntrinsicID()) {
+    default: llvm_unreachable("Unexpected intrinsic!");
+    case Intrinsic::init_trampoline:
+      return II->getArgOperand(0);
+    }
   }
+
+  CallSite CS = I;
+  // All the supported functions so far happen to have dest as their first
+  // argument.
+  return CS.getArgument(0);
 }
 
 static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) {
@@ -455,13 +496,13 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
     Instruction *Inst = BBI++;
 
     // Handle 'free' calls specially.
-    if (CallInst *F = isFreeCall(Inst, AA->getTargetLibraryInfo())) {
+    if (CallInst *F = isFreeCall(Inst, TLI)) {
       MadeChange |= HandleFree(F);
       continue;
     }
 
     // If we find something that writes memory, get its memory dependence.
-    if (!hasMemoryWrite(Inst))
+    if (!hasMemoryWrite(Inst, TLI))
       continue;
 
     MemDepResult InstDep = MD->getDependency(Inst);
@@ -484,7 +525,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
           // in case we need it.
           WeakVH NextInst(BBI);
 
-          DeleteDeadInstruction(SI, *MD, AA->getTargetLibraryInfo());
+          DeleteDeadInstruction(SI, *MD, TLI);
 
           if (NextInst == 0)  // Next instruction deleted.
             BBI = BB.begin();
@@ -531,7 +572,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
                 << *DepWrite << "\n  KILLER: " << *Inst << '\n');
 
           // Delete the store and now-dead instructions that feed it.
-          DeleteDeadInstruction(DepWrite, *MD, AA->getTargetLibraryInfo());
+          DeleteDeadInstruction(DepWrite, *MD, TLI);
           ++NumFastStores;
           MadeChange = true;
 
@@ -628,7 +669,7 @@ bool DSE::HandleFree(CallInst *F) {
     MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB);
     while (Dep.isDef() || Dep.isClobber()) {
       Instruction *Dependency = Dep.getInst();
-      if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
+      if (!hasMemoryWrite(Dependency, TLI) || !isRemovable(Dependency))
         break;
 
       Value *DepPointer =
@@ -641,7 +682,7 @@ bool DSE::HandleFree(CallInst *F) {
       Instruction *Next = llvm::next(BasicBlock::iterator(Dependency));
 
       // DCE instructions only used to calculate that store
-      DeleteDeadInstruction(Dependency, *MD, AA->getTargetLibraryInfo());
+      DeleteDeadInstruction(Dependency, *MD, TLI);
       ++NumFastStores;
       MadeChange = true;
 
@@ -681,8 +722,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
 
     // Okay, so these are dead heap objects, but if the pointer never escapes
     // then it's leaked by this function anyways.
-    else if (isAllocLikeFn(I, AA->getTargetLibraryInfo()) &&
-             !PointerMayBeCaptured(I, true, true))
+    else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true))
       DeadStackObjects.insert(I);
   }
 
@@ -698,7 +738,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
     --BBI;
 
     // If we find a store, check to see if it points into a dead stack value.
-    if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
+    if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) {
       // See through pointer-to-pointer bitcasts
       SmallVector<Value *, 4> Pointers;
       GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers);
@@ -726,8 +766,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
               dbgs() << '\n');
 
         // DCE instructions only used to calculate that store.
-        DeleteDeadInstruction(Dead, *MD, AA->getTargetLibraryInfo(),
-                              &DeadStackObjects);
+        DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects);
         ++NumFastStores;
         MadeChange = true;
         continue;
@@ -735,10 +774,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
     }
 
     // Remove any dead non-memory-mutating instructions.
-    if (isInstructionTriviallyDead(BBI, AA->getTargetLibraryInfo())) {
+    if (isInstructionTriviallyDead(BBI, TLI)) {
       Instruction *Inst = BBI++;
-      DeleteDeadInstruction(Inst, *MD, AA->getTargetLibraryInfo(),
-                            &DeadStackObjects);
+      DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects);
       ++NumFastOther;
       MadeChange = true;
       continue;
@@ -754,7 +792,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
     if (CallSite CS = cast<Value>(BBI)) {
       // Remove allocation function calls from the list of dead stack objects; 
       // there can't be any references before the definition.
-      if (isAllocLikeFn(BBI, AA->getTargetLibraryInfo()))
+      if (isAllocLikeFn(BBI, TLI))
         DeadStackObjects.remove(BBI);
 
       // If this call does not access memory, it can't be loading any of our
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 09a186f7f9..f8709a537f 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -145,7 +145,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   // not user specified.
   unsigned Threshold = CurrentThreshold;
   if (!UserThreshold &&
-      Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
+      Header->getParent()->getFnAttributes().hasOptimizeForSizeAttr())
     Threshold = OptSizeUnrollThreshold;
 
   // Find trip count and trip multiple if count is not available
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 58f7739888..74c8f43ec2 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -638,7 +638,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
   // Check to see if it would be profitable to unswitch current loop.
 
   // Do not do non-trivial unswitch while optimizing for size.
-  if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize))
+  if (OptimizeForSize || F->getFnAttributes().hasOptimizeForSizeAttr())
     return false;
 
   UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index e3182d319c..a8dc0533bf 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -202,11 +202,11 @@ public:
   use_iterator use_begin(const_iterator I) { return Uses[I - begin()].begin(); }
   use_iterator use_end(unsigned Idx) { return Uses[Idx].end(); }
   use_iterator use_end(const_iterator I) { return Uses[I - begin()].end(); }
-  void use_insert(unsigned Idx, use_iterator UI, const PartitionUse &U) {
-    Uses[Idx].insert(UI, U);
+  void use_push_back(unsigned Idx, const PartitionUse &U) {
+    Uses[Idx].push_back(U);
   }
-  void use_insert(const_iterator I, use_iterator UI, const PartitionUse &U) {
-    Uses[I - begin()].insert(UI, U);
+  void use_push_back(const_iterator I, const PartitionUse &U) {
+    Uses[I - begin()].push_back(U);
   }
   void use_erase(unsigned Idx, use_iterator UI) { Uses[Idx].erase(UI); }
   void use_erase(const_iterator I, use_iterator UI) {
@@ -522,8 +522,10 @@ private:
 
   void insertUse(Instruction &I, int64_t Offset, uint64_t Size,
                  bool IsSplittable = false) {
-    // Completely skip uses which don't overlap the allocation.
-    if ((Offset >= 0 && (uint64_t)Offset >= AllocSize) ||
+    // Completely skip uses which have a zero size or don't overlap the
+    // allocation.
+    if (Size == 0 ||
+        (Offset >= 0 && (uint64_t)Offset >= AllocSize) ||
         (Offset < 0 && (uint64_t)-Offset >= Size)) {
       DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
                    << " which starts past the end of the " << AllocSize
@@ -660,11 +662,14 @@ private:
     bool Inserted = false;
     llvm::tie(PMI, Inserted)
       = MemTransferPartitionMap.insert(std::make_pair(&II, NewIdx));
-    if (!Inserted && Offsets.IsSplittable) {
+    if (Offsets.IsSplittable &&
+        (!Inserted || II.getRawSource() == II.getRawDest())) {
       // We've found a memory transfer intrinsic which refers to the alloca as
-      // both a source and dest. We refuse to split these to simplify splitting
-      // logic. If possible, SROA will still split them into separate allocas
-      // and then re-analyze.
+      // both a source and dest. This is detected either by direct equality of
+      // the operand values, or when we visit the intrinsic twice due to two
+      // different chains of values leading to it. We refuse to split these to
+      // simplify splitting logic. If possible, SROA will still split them into
+      // separate allocas and then re-analyze.
       Offsets.IsSplittable = false;
       P.Partitions[PMI->second].IsSplittable = false;
       P.Partitions[NewIdx].IsSplittable = false;
@@ -697,6 +702,9 @@ private:
     SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses;
     Visited.insert(Root);
     Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
+    // If there are no loads or stores, the access is dead. We mark that as
+    // a size zero access.
+    Size = 0;
     do {
       Instruction *I, *UsedI;
       llvm::tie(UsedI, I) = Uses.pop_back_val();
@@ -824,9 +832,9 @@ private:
   }
 
   void insertUse(Instruction &User, int64_t Offset, uint64_t Size) {
-    // If the use extends outside of the allocation, record it as a dead use
-    // for elimination later.
-    if ((uint64_t)Offset >= AllocSize ||
+    // If the use has a zero size or extends outside of the allocation, record
+    // it as a dead use for elimination later.
+    if (Size == 0 || (uint64_t)Offset >= AllocSize ||
         (Offset < 0 && (uint64_t)-Offset >= Size))
       return markAsDead(User);
 
@@ -853,7 +861,7 @@ private:
       PartitionUse NewUse(std::max(I->BeginOffset, BeginOffset),
                           std::min(I->EndOffset, EndOffset),
                           &User, cast<Instruction>(*U));
-      P.Uses[I - P.begin()].push_back(NewUse);
+      P.use_push_back(I, NewUse);
       if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
         P.PHIOrSelectOpMap[std::make_pair(&User, U->get())]
           = std::make_pair(I - P.begin(), P.Uses[I - P.begin()].size() - 1);
@@ -1102,8 +1110,6 @@ AllocaPartitioning::AllocaPartitioning(const TargetData &TD, AllocaInst &AI)
   Uses.resize(Partitions.size());
   UseBuilder UB(TD, AI, *this);
   UB();
-  for (iterator I = Partitions.begin(), E = Partitions.end(); I != E; ++I)
-    std::stable_sort(use_begin(I), use_end(I));
 }
 
 Type *AllocaPartitioning::getCommonType(iterator I) const {
@@ -1890,7 +1896,8 @@ private:
   Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy,
                         uint64_t Offset) {
     assert(IntPromotionTy && "Alloca is not an integer we can extract from");
-    Value *V = IRB.CreateLoad(&NewAI, getName(".load"));
+    Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                     getName(".load"));
     assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
     uint64_t RelOffset = Offset - NewAllocaBeginOffset;
     if (RelOffset)
@@ -1906,7 +1913,7 @@ private:
   StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) {
     IntegerType *Ty = cast<IntegerType>(V->getType());
     if (Ty == IntPromotionTy)
-      return IRB.CreateStore(V, &NewAI);
+      return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
 
     assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() &&
            "Cannot insert a larger integer!");
@@ -1918,10 +1925,12 @@ private:
 
     APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth())
                                .shl(RelOffset*8);
-    Value *Old = IRB.CreateAnd(IRB.CreateLoad(&NewAI, getName(".oldload")),
+    Value *Old = IRB.CreateAnd(IRB.CreateAlignedLoad(&NewAI,
+                                                     NewAI.getAlignment(),
+                                                     getName(".oldload")),
                                Mask, getName(".mask"));
-    return IRB.CreateStore(IRB.CreateOr(Old, V, getName(".insert")),
-                           &NewAI);
+    return IRB.CreateAlignedStore(IRB.CreateOr(Old, V, getName(".insert")),
+                                  &NewAI, NewAI.getAlignment());
   }
 
   void deleteIfTriviallyDead(Value *V) {
@@ -1943,12 +1952,12 @@ private:
     Value *Result;
     if (LI.getType() == VecTy->getElementType() ||
         BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
-      Result
-        = IRB.CreateExtractElement(IRB.CreateLoad(&NewAI, getName(".load")),
-                                   getIndex(IRB, BeginOffset),
-                                   getName(".extract"));
+      Result = IRB.CreateExtractElement(
+        IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
+        getIndex(IRB, BeginOffset), getName(".extract"));
     } else {
-      Result = IRB.CreateLoad(&NewAI, getName(".load"));
+      Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                     getName(".load"));
     }
     if (Result->getType() != LI.getType())
       Result = getValueCast(IRB, Result, LI.getType());
@@ -1983,6 +1992,9 @@ private:
     Value *NewPtr = getAdjustedAllocaPtr(IRB,
                                          LI.getPointerOperand()->getType());
     LI.setOperand(0, NewPtr);
+    if (LI.getAlignment())
+      LI.setAlignment(MinAlign(NewAI.getAlignment(),
+                               BeginOffset - NewAllocaBeginOffset));
     DEBUG(dbgs() << "          to: " << LI << "\n");
 
     deleteIfTriviallyDead(OldOp);
@@ -1996,13 +2008,14 @@ private:
         BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
       if (V->getType() != ElementTy)
         V = getValueCast(IRB, V, ElementTy);
-      V = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V,
-                                  getIndex(IRB, BeginOffset),
+      LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                           getName(".load"));
+      V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
                                   getName(".insert"));
     } else if (V->getType() != VecTy) {
       V = getValueCast(IRB, V, VecTy);
     }
-    StoreInst *Store = IRB.CreateStore(V, &NewAI);
+    StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
     Pass.DeadInsts.push_back(&SI);
 
     (void)Store;
@@ -2033,6 +2046,9 @@ private:
     Value *NewPtr = getAdjustedAllocaPtr(IRB,
                                          SI.getPointerOperand()->getType());
     SI.setOperand(1, NewPtr);
+    if (SI.getAlignment())
+      SI.setAlignment(MinAlign(NewAI.getAlignment(),
+                               BeginOffset - NewAllocaBeginOffset));
     DEBUG(dbgs() << "          to: " << SI << "\n");
 
     deleteIfTriviallyDead(OldOp);
@@ -2048,6 +2064,15 @@ private:
     // pointer to the new alloca.
     if (!isa<Constant>(II.getLength())) {
       II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()));
+
+      Type *CstTy = II.getAlignmentCst()->getType();
+      if (!NewAI.getAlignment())
+        II.setAlignment(ConstantInt::get(CstTy, 0));
+      else
+        II.setAlignment(
+          ConstantInt::get(CstTy, MinAlign(NewAI.getAlignment(),
+                                           BeginOffset - NewAllocaBeginOffset)));
+
       deleteIfTriviallyDead(OldPtr);
       return false;
     }
@@ -2067,11 +2092,15 @@ private:
                    !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
       Type *SizeTy = II.getLength()->getType();
       Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
+      unsigned Align = 1;
+      if (NewAI.getAlignment())
+        Align = MinAlign(NewAI.getAlignment(),
+                         BeginOffset - NewAllocaBeginOffset);
 
       CallInst *New
         = IRB.CreateMemSet(getAdjustedAllocaPtr(IRB,
                                                 II.getRawDest()->getType()),
-                           II.getValue(), Size, II.getAlignment(),
+                           II.getValue(), Size, Align,
                            II.isVolatile());
       (void)New;
       DEBUG(dbgs() << "          to: " << *New << "\n");
@@ -2109,11 +2138,13 @@ private:
     // If this is an element-wide memset of a vectorizable alloca, insert it.
     if (VecTy && (BeginOffset > NewAllocaBeginOffset ||
                   EndOffset < NewAllocaEndOffset)) {
-      StoreInst *Store = IRB.CreateStore(
-        IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V,
-                                getIndex(IRB, BeginOffset),
+      StoreInst *Store = IRB.CreateAlignedStore(
+        IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
+                                                      NewAI.getAlignment(),
+                                                      getName(".load")),
+                                V, getIndex(IRB, BeginOffset),
                                 getName(".insert")),
-        &NewAI);
+        &NewAI, NewAI.getAlignment());
       (void)Store;
       DEBUG(dbgs() << "          to: " << *Store << "\n");
       return true;
@@ -2131,7 +2162,8 @@ private:
       assert(V->getType() == VecTy);
     }
 
-    Value *New = IRB.CreateStore(V, &NewAI, II.isVolatile());
+    Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
+                                        II.isVolatile());
     (void)New;
     DEBUG(dbgs() << "          to: " << *New << "\n");
     return !II.isVolatile();
@@ -2164,6 +2196,13 @@ private:
       else
         II.setSource(getAdjustedAllocaPtr(IRB, II.getRawSource()->getType()));
 
+      Type *CstTy = II.getAlignmentCst()->getType();
+      if (II.getAlignment() > 1)
+        II.setAlignment(ConstantInt::get(
+            CstTy, MinAlign(II.getAlignment(),
+                            MinAlign(NewAI.getAlignment(),
+                                     BeginOffset - NewAllocaBeginOffset))));
+
       DEBUG(dbgs() << "          to: " << II << "\n");
       deleteIfTriviallyDead(OldOp);
       return false;
@@ -2221,6 +2260,11 @@ private:
     OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
                               getName("." + OtherPtr->getName()));
 
+    unsigned Align = II.getAlignment();
+    if (Align > 1)
+      Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(),
+                       MinAlign(II.getAlignment(), NewAI.getAlignment()));
+
     // Strip all inbounds GEPs and pointer casts to try to dig out any root
     // alloca that should be re-examined after rewriting this instruction.
     if (AllocaInst *AI
@@ -2236,8 +2280,7 @@ private:
 
       CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr,
                                        IsDest ? OtherPtr : OurPtr,
-                                       Size, II.getAlignment(),
-                                       II.isVolatile());
+                                       Size, Align, II.isVolatile());
       (void)New;
       DEBUG(dbgs() << "          to: " << *New << "\n");
       return false;
@@ -2251,22 +2294,25 @@ private:
     Value *Src;
     if (IsVectorElement && !IsDest) {
       // We have to extract rather than load.
-      Src = IRB.CreateExtractElement(IRB.CreateLoad(SrcPtr,
-                                                    getName(".copyload")),
-                                     getIndex(IRB, BeginOffset),
-                                     getName(".copyextract"));
+      Src = IRB.CreateExtractElement(
+        IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
+        getIndex(IRB, BeginOffset),
+        getName(".copyextract"));
     } else {
-      Src = IRB.CreateLoad(SrcPtr, II.isVolatile(), getName(".copyload"));
+      Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
+                                  getName(".copyload"));
     }
 
     if (IsVectorElement && IsDest) {
       // We have to insert into a loaded copy before storing.
-      Src = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")),
-                                    Src, getIndex(IRB, BeginOffset),
-                                    getName(".insert"));
+      Src = IRB.CreateInsertElement(
+        IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
+        Src, getIndex(IRB, BeginOffset),
+        getName(".insert"));
     }
 
-    Value *Store = IRB.CreateStore(Src, DstPtr, II.isVolatile());
+    StoreInst *Store = cast<StoreInst>(
+      IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile()));
     (void)Store;
     DEBUG(dbgs() << "          to: " << *Store << "\n");
     return !II.isVolatile();
@@ -2460,8 +2506,7 @@ private:
         else {
           AllocaPartitioning::PartitionUse OtherUse = *UI;
           OtherUse.User = Load;
-          P.use_insert(PI, std::upper_bound(UI, P.use_end(PI), OtherUse),
-                       OtherUse);
+          P.use_push_back(PI, OtherUse);
         }
       }
     }
@@ -2559,7 +2604,7 @@ private:
         LoadInst *OtherLoad = IsTrueVal ? FL : TL;
         assert(OtherUse.Ptr == OtherLoad->getOperand(0));
         OtherUse.User = OtherLoad;
-        P.use_insert(PI, P.use_end(PI), OtherUse);
+        P.use_push_back(PI, OtherUse);
       }
 
       // Transfer alignment and TBAA info if present.
@@ -2576,8 +2621,6 @@ private:
       LI->replaceAllUsesWith(V);
       Pass.DeadInsts.push_back(LI);
     }
-    if (PI != P.end())
-      std::stable_sort(P.use_begin(PI), P.use_end(PI));
 
     deleteIfTriviallyDead(OldPtr);
     return NewPtr == &NewAI;
@@ -2959,9 +3002,19 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
     assert(PI == P.begin() && "Begin offset is zero on later partition");
     NewAI = &AI;
   } else {
-    // FIXME: The alignment here is overly conservative -- we could in many
-    // cases get away with much weaker alignment constraints.
-    NewAI = new AllocaInst(AllocaTy, 0, AI.getAlignment(),
+    unsigned Alignment = AI.getAlignment();
+    if (!Alignment) {
+      // The minimum alignment which users can rely on when the explicit
+      // alignment is omitted or zero is that required by the ABI for this
+      // type.
+      Alignment = TD->getABITypeAlignment(AI.getAllocatedType());
+    }
+    Alignment = MinAlign(Alignment, PI->BeginOffset);
+    // If we will get at least this much alignment from the type alone, leave
+    // the alloca's alignment unconstrained.
+    if (Alignment <= TD->getABITypeAlignment(AllocaTy))
+      Alignment = 0;
+    NewAI = new AllocaInst(AllocaTy, 0, Alignment,
                            AI.getName() + ".sroa." + Twine(PI - P.begin()),
                            &AI);
     ++NumNewAllocas;
author	Derek Schuff <dschuff@chromium.org>	2012-10-01 11:20:30 -0700
committer	Derek Schuff <dschuff@chromium.org>	2012-10-01 11:20:30 -0700
commit	b3423dd295c69f78bd731f1ad65ad90ce3efa36f (patch)
tree	0e872df2f0333ed1806d9e0a6906b2f5ebd58512 /lib/Transforms/Scalar
parent	a27c28b1427dc2082ab2b31efdbb25f9fde31b61 (diff)
parent	72f0976c1b91c7ba50dce4d0ad0289dc14d37f81 (diff)