aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar
diff options
context:
space:
mode:
authorDerek Schuff <dschuff@chromium.org>2012-10-01 11:20:30 -0700
committerDerek Schuff <dschuff@chromium.org>2012-10-01 11:20:30 -0700
commitb3423dd295c69f78bd731f1ad65ad90ce3efa36f (patch)
tree0e872df2f0333ed1806d9e0a6906b2f5ebd58512 /lib/Transforms/Scalar
parenta27c28b1427dc2082ab2b31efdbb25f9fde31b61 (diff)
parent72f0976c1b91c7ba50dce4d0ad0289dc14d37f81 (diff)
Merge commit '72f0976c1b91c7ba50dce4d0ad0289dc14d37f81'
Conflicts: lib/Target/ARM/ARMISelDAGToDAG.cpp lib/Target/Mips/MipsISelLowering.cpp lib/Target/Mips/MipsSubtarget.cpp
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp7
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp5
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp126
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp2
-rw-r--r--lib/Transforms/Scalar/SROA.cpp161
6 files changed, 200 insertions, 103 deletions
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 495cdc6321..305d70f27b 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -149,7 +149,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLInfo = &getAnalysis<TargetLibraryInfo>();
DT = getAnalysisIfAvailable<DominatorTree>();
PFI = getAnalysisIfAvailable<ProfileInfo>();
- OptSize = F.hasFnAttr(Attribute::OptimizeForSize);
+ OptSize = F.getFnAttributes().hasOptimizeForSizeAttr();
/// This optimization identifies DIV instructions that can be
/// profitably bypassed and carried out with a shorter, faster divide.
@@ -226,7 +226,8 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
// edge, just collapse it.
BasicBlock *SinglePred = BB->getSinglePredecessor();
- if (!SinglePred || SinglePred == BB) continue;
+ // Don't merge if BB's address is taken.
+ if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
if (Term && !Term->isConditional()) {
@@ -788,7 +789,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
}
// If we eliminated all predecessors of the block, delete the block now.
- if (Changed && pred_begin(BB) == pred_end(BB))
+ if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
BB->eraseFromParent();
return Changed;
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 9b0aadb0b5..3ec6f3dcc3 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -235,6 +235,11 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
// This case never fires - remove it.
CI.getCaseSuccessor()->removePredecessor(BB);
SI->removeCase(CI); // Does not invalidate the iterator.
+
+ // The condition can be modified by removePredecessor's PHI simplification
+ // logic.
+ Cond = SI->getCondition();
+
++NumDeadCases;
Changed = true;
} else if (State == LazyValueInfo::True) {
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 1ff4329c84..301ee2f663 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -30,6 +30,7 @@
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/Debug.h"
#include "llvm/ADT/SetVector.h"
@@ -45,6 +46,7 @@ namespace {
AliasAnalysis *AA;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
+ const TargetLibraryInfo *TLI;
static char ID; // Pass identification, replacement for typeid
DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) {
@@ -55,6 +57,7 @@ namespace {
AA = &getAnalysis<AliasAnalysis>();
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTree>();
+ TLI = AA->getTargetLibraryInfo();
bool Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
@@ -144,7 +147,7 @@ static void DeleteDeadInstruction(Instruction *I,
/// hasMemoryWrite - Does this instruction write some memory? This only returns
/// true for things that we can analyze with other helpers below.
-static bool hasMemoryWrite(Instruction *I) {
+static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
if (isa<StoreInst>(I))
return true;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
@@ -159,6 +162,26 @@ static bool hasMemoryWrite(Instruction *I) {
return true;
}
}
+ if (CallSite CS = I) {
+ if (Function *F = CS.getCalledFunction()) {
+ if (TLI && TLI->has(LibFunc::strcpy) &&
+ F->getName() == TLI->getName(LibFunc::strcpy)) {
+ return true;
+ }
+ if (TLI && TLI->has(LibFunc::strncpy) &&
+ F->getName() == TLI->getName(LibFunc::strncpy)) {
+ return true;
+ }
+ if (TLI && TLI->has(LibFunc::strcat) &&
+ F->getName() == TLI->getName(LibFunc::strcat)) {
+ return true;
+ }
+ if (TLI && TLI->has(LibFunc::strncat) &&
+ F->getName() == TLI->getName(LibFunc::strncat)) {
+ return true;
+ }
+ }
+ }
return false;
}
@@ -206,7 +229,8 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
/// instruction if any.
static AliasAnalysis::Location
getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
- assert(hasMemoryWrite(Inst) && "Unknown instruction case");
+ assert(hasMemoryWrite(Inst, AA.getTargetLibraryInfo()) &&
+ "Unknown instruction case");
// The only instructions that both read and write are the mem transfer
// instructions (memcpy/memmove).
@@ -223,23 +247,29 @@ static bool isRemovable(Instruction *I) {
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->isUnordered();
- IntrinsicInst *II = cast<IntrinsicInst>(I);
- switch (II->getIntrinsicID()) {
- default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate");
- case Intrinsic::lifetime_end:
- // Never remove dead lifetime_end's, e.g. because it is followed by a
- // free.
- return false;
- case Intrinsic::init_trampoline:
- // Always safe to remove init_trampoline.
- return true;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate");
+ case Intrinsic::lifetime_end:
+ // Never remove dead lifetime_end's, e.g. because it is followed by a
+ // free.
+ return false;
+ case Intrinsic::init_trampoline:
+ // Always safe to remove init_trampoline.
+ return true;
- case Intrinsic::memset:
- case Intrinsic::memmove:
- case Intrinsic::memcpy:
- // Don't remove volatile memory intrinsics.
- return !cast<MemIntrinsic>(II)->isVolatile();
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ // Don't remove volatile memory intrinsics.
+ return !cast<MemIntrinsic>(II)->isVolatile();
+ }
}
+
+ if (CallSite CS = I)
+ return CS.getInstruction()->use_empty();
+
+ return false;
}
@@ -250,14 +280,19 @@ static bool isShortenable(Instruction *I) {
if (isa<StoreInst>(I))
return false;
- IntrinsicInst *II = cast<IntrinsicInst>(I);
- switch (II->getIntrinsicID()) {
- default: return false;
- case Intrinsic::memset:
- case Intrinsic::memcpy:
- // Do shorten memory intrinsics.
- return true;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::memset:
+ case Intrinsic::memcpy:
+ // Do shorten memory intrinsics.
+ return true;
+ }
}
+
+ // Don't shorten libcalls calls for now.
+
+ return false;
}
/// getStoredPointerOperand - Return the pointer that is being written to.
@@ -267,12 +302,18 @@ static Value *getStoredPointerOperand(Instruction *I) {
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
return MI->getDest();
- IntrinsicInst *II = cast<IntrinsicInst>(I);
- switch (II->getIntrinsicID()) {
- default: llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::init_trampoline:
- return II->getArgOperand(0);
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::init_trampoline:
+ return II->getArgOperand(0);
+ }
}
+
+ CallSite CS = I;
+ // All the supported functions so far happen to have dest as their first
+ // argument.
+ return CS.getArgument(0);
}
static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) {
@@ -455,13 +496,13 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
Instruction *Inst = BBI++;
// Handle 'free' calls specially.
- if (CallInst *F = isFreeCall(Inst, AA->getTargetLibraryInfo())) {
+ if (CallInst *F = isFreeCall(Inst, TLI)) {
MadeChange |= HandleFree(F);
continue;
}
// If we find something that writes memory, get its memory dependence.
- if (!hasMemoryWrite(Inst))
+ if (!hasMemoryWrite(Inst, TLI))
continue;
MemDepResult InstDep = MD->getDependency(Inst);
@@ -484,7 +525,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
// in case we need it.
WeakVH NextInst(BBI);
- DeleteDeadInstruction(SI, *MD, AA->getTargetLibraryInfo());
+ DeleteDeadInstruction(SI, *MD, TLI);
if (NextInst == 0) // Next instruction deleted.
BBI = BB.begin();
@@ -531,7 +572,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
<< *DepWrite << "\n KILLER: " << *Inst << '\n');
// Delete the store and now-dead instructions that feed it.
- DeleteDeadInstruction(DepWrite, *MD, AA->getTargetLibraryInfo());
+ DeleteDeadInstruction(DepWrite, *MD, TLI);
++NumFastStores;
MadeChange = true;
@@ -628,7 +669,7 @@ bool DSE::HandleFree(CallInst *F) {
MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB);
while (Dep.isDef() || Dep.isClobber()) {
Instruction *Dependency = Dep.getInst();
- if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
+ if (!hasMemoryWrite(Dependency, TLI) || !isRemovable(Dependency))
break;
Value *DepPointer =
@@ -641,7 +682,7 @@ bool DSE::HandleFree(CallInst *F) {
Instruction *Next = llvm::next(BasicBlock::iterator(Dependency));
// DCE instructions only used to calculate that store
- DeleteDeadInstruction(Dependency, *MD, AA->getTargetLibraryInfo());
+ DeleteDeadInstruction(Dependency, *MD, TLI);
++NumFastStores;
MadeChange = true;
@@ -681,8 +722,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Okay, so these are dead heap objects, but if the pointer never escapes
// then it's leaked by this function anyways.
- else if (isAllocLikeFn(I, AA->getTargetLibraryInfo()) &&
- !PointerMayBeCaptured(I, true, true))
+ else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true))
DeadStackObjects.insert(I);
}
@@ -698,7 +738,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
--BBI;
// If we find a store, check to see if it points into a dead stack value.
- if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
+ if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) {
// See through pointer-to-pointer bitcasts
SmallVector<Value *, 4> Pointers;
GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers);
@@ -726,8 +766,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
dbgs() << '\n');
// DCE instructions only used to calculate that store.
- DeleteDeadInstruction(Dead, *MD, AA->getTargetLibraryInfo(),
- &DeadStackObjects);
+ DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects);
++NumFastStores;
MadeChange = true;
continue;
@@ -735,10 +774,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
}
// Remove any dead non-memory-mutating instructions.
- if (isInstructionTriviallyDead(BBI, AA->getTargetLibraryInfo())) {
+ if (isInstructionTriviallyDead(BBI, TLI)) {
Instruction *Inst = BBI++;
- DeleteDeadInstruction(Inst, *MD, AA->getTargetLibraryInfo(),
- &DeadStackObjects);
+ DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects);
++NumFastOther;
MadeChange = true;
continue;
@@ -754,7 +792,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (CallSite CS = cast<Value>(BBI)) {
// Remove allocation function calls from the list of dead stack objects;
// there can't be any references before the definition.
- if (isAllocLikeFn(BBI, AA->getTargetLibraryInfo()))
+ if (isAllocLikeFn(BBI, TLI))
DeadStackObjects.remove(BBI);
// If this call does not access memory, it can't be loading any of our
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 09a186f7f9..f8709a537f 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -145,7 +145,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
// not user specified.
unsigned Threshold = CurrentThreshold;
if (!UserThreshold &&
- Header->getParent()->hasFnAttr(Attribute::OptimizeForSize))
+ Header->getParent()->getFnAttributes().hasOptimizeForSizeAttr())
Threshold = OptSizeUnrollThreshold;
// Find trip count and trip multiple if count is not available
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 58f7739888..74c8f43ec2 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -638,7 +638,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
// Check to see if it would be profitable to unswitch current loop.
// Do not do non-trivial unswitch while optimizing for size.
- if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize))
+ if (OptimizeForSize || F->getFnAttributes().hasOptimizeForSizeAttr())
return false;
UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index e3182d319c..a8dc0533bf 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -202,11 +202,11 @@ public:
use_iterator use_begin(const_iterator I) { return Uses[I - begin()].begin(); }
use_iterator use_end(unsigned Idx) { return Uses[Idx].end(); }
use_iterator use_end(const_iterator I) { return Uses[I - begin()].end(); }
- void use_insert(unsigned Idx, use_iterator UI, const PartitionUse &U) {
- Uses[Idx].insert(UI, U);
+ void use_push_back(unsigned Idx, const PartitionUse &U) {
+ Uses[Idx].push_back(U);
}
- void use_insert(const_iterator I, use_iterator UI, const PartitionUse &U) {
- Uses[I - begin()].insert(UI, U);
+ void use_push_back(const_iterator I, const PartitionUse &U) {
+ Uses[I - begin()].push_back(U);
}
void use_erase(unsigned Idx, use_iterator UI) { Uses[Idx].erase(UI); }
void use_erase(const_iterator I, use_iterator UI) {
@@ -522,8 +522,10 @@ private:
void insertUse(Instruction &I, int64_t Offset, uint64_t Size,
bool IsSplittable = false) {
- // Completely skip uses which don't overlap the allocation.
- if ((Offset >= 0 && (uint64_t)Offset >= AllocSize) ||
+ // Completely skip uses which have a zero size or don't overlap the
+ // allocation.
+ if (Size == 0 ||
+ (Offset >= 0 && (uint64_t)Offset >= AllocSize) ||
(Offset < 0 && (uint64_t)-Offset >= Size)) {
DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
<< " which starts past the end of the " << AllocSize
@@ -660,11 +662,14 @@ private:
bool Inserted = false;
llvm::tie(PMI, Inserted)
= MemTransferPartitionMap.insert(std::make_pair(&II, NewIdx));
- if (!Inserted && Offsets.IsSplittable) {
+ if (Offsets.IsSplittable &&
+ (!Inserted || II.getRawSource() == II.getRawDest())) {
// We've found a memory transfer intrinsic which refers to the alloca as
- // both a source and dest. We refuse to split these to simplify splitting
- // logic. If possible, SROA will still split them into separate allocas
- // and then re-analyze.
+ // both a source and dest. This is detected either by direct equality of
+ // the operand values, or when we visit the intrinsic twice due to two
+ // different chains of values leading to it. We refuse to split these to
+ // simplify splitting logic. If possible, SROA will still split them into
+ // separate allocas and then re-analyze.
Offsets.IsSplittable = false;
P.Partitions[PMI->second].IsSplittable = false;
P.Partitions[NewIdx].IsSplittable = false;
@@ -697,6 +702,9 @@ private:
SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses;
Visited.insert(Root);
Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
+ // If there are no loads or stores, the access is dead. We mark that as
+ // a size zero access.
+ Size = 0;
do {
Instruction *I, *UsedI;
llvm::tie(UsedI, I) = Uses.pop_back_val();
@@ -824,9 +832,9 @@ private:
}
void insertUse(Instruction &User, int64_t Offset, uint64_t Size) {
- // If the use extends outside of the allocation, record it as a dead use
- // for elimination later.
- if ((uint64_t)Offset >= AllocSize ||
+ // If the use has a zero size or extends outside of the allocation, record
+ // it as a dead use for elimination later.
+ if (Size == 0 || (uint64_t)Offset >= AllocSize ||
(Offset < 0 && (uint64_t)-Offset >= Size))
return markAsDead(User);
@@ -853,7 +861,7 @@ private:
PartitionUse NewUse(std::max(I->BeginOffset, BeginOffset),
std::min(I->EndOffset, EndOffset),
&User, cast<Instruction>(*U));
- P.Uses[I - P.begin()].push_back(NewUse);
+ P.use_push_back(I, NewUse);
if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser()))
P.PHIOrSelectOpMap[std::make_pair(&User, U->get())]
= std::make_pair(I - P.begin(), P.Uses[I - P.begin()].size() - 1);
@@ -1102,8 +1110,6 @@ AllocaPartitioning::AllocaPartitioning(const TargetData &TD, AllocaInst &AI)
Uses.resize(Partitions.size());
UseBuilder UB(TD, AI, *this);
UB();
- for (iterator I = Partitions.begin(), E = Partitions.end(); I != E; ++I)
- std::stable_sort(use_begin(I), use_end(I));
}
Type *AllocaPartitioning::getCommonType(iterator I) const {
@@ -1890,7 +1896,8 @@ private:
Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy,
uint64_t Offset) {
assert(IntPromotionTy && "Alloca is not an integer we can extract from");
- Value *V = IRB.CreateLoad(&NewAI, getName(".load"));
+ Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
uint64_t RelOffset = Offset - NewAllocaBeginOffset;
if (RelOffset)
@@ -1906,7 +1913,7 @@ private:
StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) {
IntegerType *Ty = cast<IntegerType>(V->getType());
if (Ty == IntPromotionTy)
- return IRB.CreateStore(V, &NewAI);
+ return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() &&
"Cannot insert a larger integer!");
@@ -1918,10 +1925,12 @@ private:
APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth())
.shl(RelOffset*8);
- Value *Old = IRB.CreateAnd(IRB.CreateLoad(&NewAI, getName(".oldload")),
+ Value *Old = IRB.CreateAnd(IRB.CreateAlignedLoad(&NewAI,
+ NewAI.getAlignment(),
+ getName(".oldload")),
Mask, getName(".mask"));
- return IRB.CreateStore(IRB.CreateOr(Old, V, getName(".insert")),
- &NewAI);
+ return IRB.CreateAlignedStore(IRB.CreateOr(Old, V, getName(".insert")),
+ &NewAI, NewAI.getAlignment());
}
void deleteIfTriviallyDead(Value *V) {
@@ -1943,12 +1952,12 @@ private:
Value *Result;
if (LI.getType() == VecTy->getElementType() ||
BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
- Result
- = IRB.CreateExtractElement(IRB.CreateLoad(&NewAI, getName(".load")),
- getIndex(IRB, BeginOffset),
- getName(".extract"));
+ Result = IRB.CreateExtractElement(
+ IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
+ getIndex(IRB, BeginOffset), getName(".extract"));
} else {
- Result = IRB.CreateLoad(&NewAI, getName(".load"));
+ Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
}
if (Result->getType() != LI.getType())
Result = getValueCast(IRB, Result, LI.getType());
@@ -1983,6 +1992,9 @@ private:
Value *NewPtr = getAdjustedAllocaPtr(IRB,
LI.getPointerOperand()->getType());
LI.setOperand(0, NewPtr);
+ if (LI.getAlignment())
+ LI.setAlignment(MinAlign(NewAI.getAlignment(),
+ BeginOffset - NewAllocaBeginOffset));
DEBUG(dbgs() << " to: " << LI << "\n");
deleteIfTriviallyDead(OldOp);
@@ -1996,13 +2008,14 @@ private:
BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) {
if (V->getType() != ElementTy)
V = getValueCast(IRB, V, ElementTy);
- V = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V,
- getIndex(IRB, BeginOffset),
+ LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
+ V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset),
getName(".insert"));
} else if (V->getType() != VecTy) {
V = getValueCast(IRB, V, VecTy);
}
- StoreInst *Store = IRB.CreateStore(V, &NewAI);
+ StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
Pass.DeadInsts.push_back(&SI);
(void)Store;
@@ -2033,6 +2046,9 @@ private:
Value *NewPtr = getAdjustedAllocaPtr(IRB,
SI.getPointerOperand()->getType());
SI.setOperand(1, NewPtr);
+ if (SI.getAlignment())
+ SI.setAlignment(MinAlign(NewAI.getAlignment(),
+ BeginOffset - NewAllocaBeginOffset));
DEBUG(dbgs() << " to: " << SI << "\n");
deleteIfTriviallyDead(OldOp);
@@ -2048,6 +2064,15 @@ private:
// pointer to the new alloca.
if (!isa<Constant>(II.getLength())) {
II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()));
+
+ Type *CstTy = II.getAlignmentCst()->getType();
+ if (!NewAI.getAlignment())
+ II.setAlignment(ConstantInt::get(CstTy, 0));
+ else
+ II.setAlignment(
+ ConstantInt::get(CstTy, MinAlign(NewAI.getAlignment(),
+ BeginOffset - NewAllocaBeginOffset)));
+
deleteIfTriviallyDead(OldPtr);
return false;
}
@@ -2067,11 +2092,15 @@ private:
!TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
+ unsigned Align = 1;
+ if (NewAI.getAlignment())
+ Align = MinAlign(NewAI.getAlignment(),
+ BeginOffset - NewAllocaBeginOffset);
CallInst *New
= IRB.CreateMemSet(getAdjustedAllocaPtr(IRB,
II.getRawDest()->getType()),
- II.getValue(), Size, II.getAlignment(),
+ II.getValue(), Size, Align,
II.isVolatile());
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
@@ -2109,11 +2138,13 @@ private:
// If this is an element-wide memset of a vectorizable alloca, insert it.
if (VecTy && (BeginOffset > NewAllocaBeginOffset ||
EndOffset < NewAllocaEndOffset)) {
- StoreInst *Store = IRB.CreateStore(
- IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V,
- getIndex(IRB, BeginOffset),
+ StoreInst *Store = IRB.CreateAlignedStore(
+ IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
+ NewAI.getAlignment(),
+ getName(".load")),
+ V, getIndex(IRB, BeginOffset),
getName(".insert")),
- &NewAI);
+ &NewAI, NewAI.getAlignment());
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
@@ -2131,7 +2162,8 @@ private:
assert(V->getType() == VecTy);
}
- Value *New = IRB.CreateStore(V, &NewAI, II.isVolatile());
+ Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
+ II.isVolatile());
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
return !II.isVolatile();
@@ -2164,6 +2196,13 @@ private:
else
II.setSource(getAdjustedAllocaPtr(IRB, II.getRawSource()->getType()));
+ Type *CstTy = II.getAlignmentCst()->getType();
+ if (II.getAlignment() > 1)
+ II.setAlignment(ConstantInt::get(
+ CstTy, MinAlign(II.getAlignment(),
+ MinAlign(NewAI.getAlignment(),
+ BeginOffset - NewAllocaBeginOffset))));
+
DEBUG(dbgs() << " to: " << II << "\n");
deleteIfTriviallyDead(OldOp);
return false;
@@ -2221,6 +2260,11 @@ private:
OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
getName("." + OtherPtr->getName()));
+ unsigned Align = II.getAlignment();
+ if (Align > 1)
+ Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(),
+ MinAlign(II.getAlignment(), NewAI.getAlignment()));
+
// Strip all inbounds GEPs and pointer casts to try to dig out any root
// alloca that should be re-examined after rewriting this instruction.
if (AllocaInst *AI
@@ -2236,8 +2280,7 @@ private:
CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr,
IsDest ? OtherPtr : OurPtr,
- Size, II.getAlignment(),
- II.isVolatile());
+ Size, Align, II.isVolatile());
(void)New;
DEBUG(dbgs() << " to: " << *New << "\n");
return false;
@@ -2251,22 +2294,25 @@ private:
Value *Src;
if (IsVectorElement && !IsDest) {
// We have to extract rather than load.
- Src = IRB.CreateExtractElement(IRB.CreateLoad(SrcPtr,
- getName(".copyload")),
- getIndex(IRB, BeginOffset),
- getName(".copyextract"));
+ Src = IRB.CreateExtractElement(
+ IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
+ getIndex(IRB, BeginOffset),
+ getName(".copyextract"));
} else {
- Src = IRB.CreateLoad(SrcPtr, II.isVolatile(), getName(".copyload"));
+ Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
+ getName(".copyload"));
}
if (IsVectorElement && IsDest) {
// We have to insert into a loaded copy before storing.
- Src = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")),
- Src, getIndex(IRB, BeginOffset),
- getName(".insert"));
+ Src = IRB.CreateInsertElement(
+ IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
+ Src, getIndex(IRB, BeginOffset),
+ getName(".insert"));
}
- Value *Store = IRB.CreateStore(Src, DstPtr, II.isVolatile());
+ StoreInst *Store = cast<StoreInst>(
+ IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile()));
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
return !II.isVolatile();
@@ -2460,8 +2506,7 @@ private:
else {
AllocaPartitioning::PartitionUse OtherUse = *UI;
OtherUse.User = Load;
- P.use_insert(PI, std::upper_bound(UI, P.use_end(PI), OtherUse),
- OtherUse);
+ P.use_push_back(PI, OtherUse);
}
}
}
@@ -2559,7 +2604,7 @@ private:
LoadInst *OtherLoad = IsTrueVal ? FL : TL;
assert(OtherUse.Ptr == OtherLoad->getOperand(0));
OtherUse.User = OtherLoad;
- P.use_insert(PI, P.use_end(PI), OtherUse);
+ P.use_push_back(PI, OtherUse);
}
// Transfer alignment and TBAA info if present.
@@ -2576,8 +2621,6 @@ private:
LI->replaceAllUsesWith(V);
Pass.DeadInsts.push_back(LI);
}
- if (PI != P.end())
- std::stable_sort(P.use_begin(PI), P.use_end(PI));
deleteIfTriviallyDead(OldPtr);
return NewPtr == &NewAI;
@@ -2959,9 +3002,19 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
assert(PI == P.begin() && "Begin offset is zero on later partition");
NewAI = &AI;
} else {
- // FIXME: The alignment here is overly conservative -- we could in many
- // cases get away with much weaker alignment constraints.
- NewAI = new AllocaInst(AllocaTy, 0, AI.getAlignment(),
+ unsigned Alignment = AI.getAlignment();
+ if (!Alignment) {
+ // The minimum alignment which users can rely on when the explicit
+ // alignment is omitted or zero is that required by the ABI for this
+ // type.
+ Alignment = TD->getABITypeAlignment(AI.getAllocatedType());
+ }
+ Alignment = MinAlign(Alignment, PI->BeginOffset);
+ // If we will get at least this much alignment from the type alone, leave
+ // the alloca's alignment unconstrained.
+ if (Alignment <= TD->getABITypeAlignment(AllocaTy))
+ Alignment = 0;
+ NewAI = new AllocaInst(AllocaTy, 0, Alignment,
AI.getName() + ".sroa." + Twine(PI - P.begin()),
&AI);
++NumNewAllocas;