diff options
author | Derek Schuff <dschuff@chromium.org> | 2012-10-01 11:20:30 -0700 |
---|---|---|
committer | Derek Schuff <dschuff@chromium.org> | 2012-10-01 11:20:30 -0700 |
commit | b3423dd295c69f78bd731f1ad65ad90ce3efa36f (patch) | |
tree | 0e872df2f0333ed1806d9e0a6906b2f5ebd58512 /lib/Transforms/Scalar | |
parent | a27c28b1427dc2082ab2b31efdbb25f9fde31b61 (diff) | |
parent | 72f0976c1b91c7ba50dce4d0ad0289dc14d37f81 (diff) |
Merge commit '72f0976c1b91c7ba50dce4d0ad0289dc14d37f81'
Conflicts:
lib/Target/ARM/ARMISelDAGToDAG.cpp
lib/Target/Mips/MipsISelLowering.cpp
lib/Target/Mips/MipsSubtarget.cpp
Diffstat (limited to 'lib/Transforms/Scalar')
-rw-r--r-- | lib/Transforms/Scalar/CodeGenPrepare.cpp | 7 | ||||
-rw-r--r-- | lib/Transforms/Scalar/CorrelatedValuePropagation.cpp | 5 | ||||
-rw-r--r-- | lib/Transforms/Scalar/DeadStoreElimination.cpp | 126 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopUnrollPass.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Scalar/LoopUnswitch.cpp | 2 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SROA.cpp | 161 |
6 files changed, 200 insertions, 103 deletions
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 495cdc6321..305d70f27b 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -149,7 +149,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) { TLInfo = &getAnalysis<TargetLibraryInfo>(); DT = getAnalysisIfAvailable<DominatorTree>(); PFI = getAnalysisIfAvailable<ProfileInfo>(); - OptSize = F.hasFnAttr(Attribute::OptimizeForSize); + OptSize = F.getFnAttributes().hasOptimizeForSizeAttr(); /// This optimization identifies DIV instructions that can be /// profitably bypassed and carried out with a shorter, faster divide. @@ -226,7 +226,8 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) { // edge, just collapse it. BasicBlock *SinglePred = BB->getSinglePredecessor(); - if (!SinglePred || SinglePred == BB) continue; + // Don't merge if BB's address is taken. + if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); if (Term && !Term->isConditional()) { @@ -788,7 +789,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { } // If we eliminated all predecessors of the block, delete the block now. - if (Changed && pred_begin(BB) == pred_end(BB)) + if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) BB->eraseFromParent(); return Changed; diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 9b0aadb0b5..3ec6f3dcc3 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -235,6 +235,11 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) { // This case never fires - remove it. CI.getCaseSuccessor()->removePredecessor(BB); SI->removeCase(CI); // Does not invalidate the iterator. + + // The condition can be modified by removePredecessor's PHI simplification + // logic. + Cond = SI->getCondition(); + ++NumDeadCases; Changed = true; } else if (State == LazyValueInfo::True) { diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 1ff4329c84..301ee2f663 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Support/Debug.h" #include "llvm/ADT/SetVector.h" @@ -45,6 +46,7 @@ namespace { AliasAnalysis *AA; MemoryDependenceAnalysis *MD; DominatorTree *DT; + const TargetLibraryInfo *TLI; static char ID; // Pass identification, replacement for typeid DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) { @@ -55,6 +57,7 @@ namespace { AA = &getAnalysis<AliasAnalysis>(); MD = &getAnalysis<MemoryDependenceAnalysis>(); DT = &getAnalysis<DominatorTree>(); + TLI = AA->getTargetLibraryInfo(); bool Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) @@ -144,7 +147,7 @@ static void DeleteDeadInstruction(Instruction *I, /// hasMemoryWrite - Does this instruction write some memory? This only returns /// true for things that we can analyze with other helpers below. -static bool hasMemoryWrite(Instruction *I) { +static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) { if (isa<StoreInst>(I)) return true; if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { @@ -159,6 +162,26 @@ static bool hasMemoryWrite(Instruction *I) { return true; } } + if (CallSite CS = I) { + if (Function *F = CS.getCalledFunction()) { + if (TLI && TLI->has(LibFunc::strcpy) && + F->getName() == TLI->getName(LibFunc::strcpy)) { + return true; + } + if (TLI && TLI->has(LibFunc::strncpy) && + F->getName() == TLI->getName(LibFunc::strncpy)) { + return true; + } + if (TLI && TLI->has(LibFunc::strcat) && + F->getName() == TLI->getName(LibFunc::strcat)) { + return true; + } + if (TLI && TLI->has(LibFunc::strncat) && + F->getName() == TLI->getName(LibFunc::strncat)) { + return true; + } + } + } return false; } @@ -206,7 +229,8 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) { /// instruction if any. static AliasAnalysis::Location getLocForRead(Instruction *Inst, AliasAnalysis &AA) { - assert(hasMemoryWrite(Inst) && "Unknown instruction case"); + assert(hasMemoryWrite(Inst, AA.getTargetLibraryInfo()) && + "Unknown instruction case"); // The only instructions that both read and write are the mem transfer // instructions (memcpy/memmove). @@ -223,23 +247,29 @@ static bool isRemovable(Instruction *I) { if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->isUnordered(); - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate"); - case Intrinsic::lifetime_end: - // Never remove dead lifetime_end's, e.g. because it is followed by a - // free. - return false; - case Intrinsic::init_trampoline: - // Always safe to remove init_trampoline. - return true; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: llvm_unreachable("doesn't pass 'hasMemoryWrite' predicate"); + case Intrinsic::lifetime_end: + // Never remove dead lifetime_end's, e.g. because it is followed by a + // free. + return false; + case Intrinsic::init_trampoline: + // Always safe to remove init_trampoline. + return true; - case Intrinsic::memset: - case Intrinsic::memmove: - case Intrinsic::memcpy: - // Don't remove volatile memory intrinsics. - return !cast<MemIntrinsic>(II)->isVolatile(); + case Intrinsic::memset: + case Intrinsic::memmove: + case Intrinsic::memcpy: + // Don't remove volatile memory intrinsics. + return !cast<MemIntrinsic>(II)->isVolatile(); + } } + + if (CallSite CS = I) + return CS.getInstruction()->use_empty(); + + return false; } @@ -250,14 +280,19 @@ static bool isShortenable(Instruction *I) { if (isa<StoreInst>(I)) return false; - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: return false; - case Intrinsic::memset: - case Intrinsic::memcpy: - // Do shorten memory intrinsics. - return true; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::memset: + case Intrinsic::memcpy: + // Do shorten memory intrinsics. + return true; + } } + + // Don't shorten libcalls calls for now. + + return false; } /// getStoredPointerOperand - Return the pointer that is being written to. @@ -267,12 +302,18 @@ static Value *getStoredPointerOperand(Instruction *I) { if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) return MI->getDest(); - IntrinsicInst *II = cast<IntrinsicInst>(I); - switch (II->getIntrinsicID()) { - default: llvm_unreachable("Unexpected intrinsic!"); - case Intrinsic::init_trampoline: - return II->getArgOperand(0); + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: llvm_unreachable("Unexpected intrinsic!"); + case Intrinsic::init_trampoline: + return II->getArgOperand(0); + } } + + CallSite CS = I; + // All the supported functions so far happen to have dest as their first + // argument. + return CS.getArgument(0); } static uint64_t getPointerSize(const Value *V, AliasAnalysis &AA) { @@ -455,13 +496,13 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { Instruction *Inst = BBI++; // Handle 'free' calls specially. - if (CallInst *F = isFreeCall(Inst, AA->getTargetLibraryInfo())) { + if (CallInst *F = isFreeCall(Inst, TLI)) { MadeChange |= HandleFree(F); continue; } // If we find something that writes memory, get its memory dependence. - if (!hasMemoryWrite(Inst)) + if (!hasMemoryWrite(Inst, TLI)) continue; MemDepResult InstDep = MD->getDependency(Inst); @@ -484,7 +525,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // in case we need it. WeakVH NextInst(BBI); - DeleteDeadInstruction(SI, *MD, AA->getTargetLibraryInfo()); + DeleteDeadInstruction(SI, *MD, TLI); if (NextInst == 0) // Next instruction deleted. BBI = BB.begin(); @@ -531,7 +572,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { << *DepWrite << "\n KILLER: " << *Inst << '\n'); // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD, AA->getTargetLibraryInfo()); + DeleteDeadInstruction(DepWrite, *MD, TLI); ++NumFastStores; MadeChange = true; @@ -628,7 +669,7 @@ bool DSE::HandleFree(CallInst *F) { MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB); while (Dep.isDef() || Dep.isClobber()) { Instruction *Dependency = Dep.getInst(); - if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency)) + if (!hasMemoryWrite(Dependency, TLI) || !isRemovable(Dependency)) break; Value *DepPointer = @@ -641,7 +682,7 @@ bool DSE::HandleFree(CallInst *F) { Instruction *Next = llvm::next(BasicBlock::iterator(Dependency)); // DCE instructions only used to calculate that store - DeleteDeadInstruction(Dependency, *MD, AA->getTargetLibraryInfo()); + DeleteDeadInstruction(Dependency, *MD, TLI); ++NumFastStores; MadeChange = true; @@ -681,8 +722,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Okay, so these are dead heap objects, but if the pointer never escapes // then it's leaked by this function anyways. - else if (isAllocLikeFn(I, AA->getTargetLibraryInfo()) && - !PointerMayBeCaptured(I, true, true)) + else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true)) DeadStackObjects.insert(I); } @@ -698,7 +738,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { --BBI; // If we find a store, check to see if it points into a dead stack value. - if (hasMemoryWrite(BBI) && isRemovable(BBI)) { + if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) { // See through pointer-to-pointer bitcasts SmallVector<Value *, 4> Pointers; GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers); @@ -726,8 +766,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { dbgs() << '\n'); // DCE instructions only used to calculate that store. - DeleteDeadInstruction(Dead, *MD, AA->getTargetLibraryInfo(), - &DeadStackObjects); + DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects); ++NumFastStores; MadeChange = true; continue; @@ -735,10 +774,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } // Remove any dead non-memory-mutating instructions. - if (isInstructionTriviallyDead(BBI, AA->getTargetLibraryInfo())) { + if (isInstructionTriviallyDead(BBI, TLI)) { Instruction *Inst = BBI++; - DeleteDeadInstruction(Inst, *MD, AA->getTargetLibraryInfo(), - &DeadStackObjects); + DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects); ++NumFastOther; MadeChange = true; continue; @@ -754,7 +792,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (CallSite CS = cast<Value>(BBI)) { // Remove allocation function calls from the list of dead stack objects; // there can't be any references before the definition. - if (isAllocLikeFn(BBI, AA->getTargetLibraryInfo())) + if (isAllocLikeFn(BBI, TLI)) DeadStackObjects.remove(BBI); // If this call does not access memory, it can't be loading any of our diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 09a186f7f9..f8709a537f 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -145,7 +145,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // not user specified. unsigned Threshold = CurrentThreshold; if (!UserThreshold && - Header->getParent()->hasFnAttr(Attribute::OptimizeForSize)) + Header->getParent()->getFnAttributes().hasOptimizeForSizeAttr()) Threshold = OptSizeUnrollThreshold; // Find trip count and trip multiple if count is not available diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 58f7739888..74c8f43ec2 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -638,7 +638,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { // Check to see if it would be profitable to unswitch current loop. // Do not do non-trivial unswitch while optimizing for size. - if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize)) + if (OptimizeForSize || F->getFnAttributes().hasOptimizeForSizeAttr()) return false; UnswitchNontrivialCondition(LoopCond, Val, currentLoop); diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index e3182d319c..a8dc0533bf 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -202,11 +202,11 @@ public: use_iterator use_begin(const_iterator I) { return Uses[I - begin()].begin(); } use_iterator use_end(unsigned Idx) { return Uses[Idx].end(); } use_iterator use_end(const_iterator I) { return Uses[I - begin()].end(); } - void use_insert(unsigned Idx, use_iterator UI, const PartitionUse &U) { - Uses[Idx].insert(UI, U); + void use_push_back(unsigned Idx, const PartitionUse &U) { + Uses[Idx].push_back(U); } - void use_insert(const_iterator I, use_iterator UI, const PartitionUse &U) { - Uses[I - begin()].insert(UI, U); + void use_push_back(const_iterator I, const PartitionUse &U) { + Uses[I - begin()].push_back(U); } void use_erase(unsigned Idx, use_iterator UI) { Uses[Idx].erase(UI); } void use_erase(const_iterator I, use_iterator UI) { @@ -522,8 +522,10 @@ private: void insertUse(Instruction &I, int64_t Offset, uint64_t Size, bool IsSplittable = false) { - // Completely skip uses which don't overlap the allocation. - if ((Offset >= 0 && (uint64_t)Offset >= AllocSize) || + // Completely skip uses which have a zero size or don't overlap the + // allocation. + if (Size == 0 || + (Offset >= 0 && (uint64_t)Offset >= AllocSize) || (Offset < 0 && (uint64_t)-Offset >= Size)) { DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset << " which starts past the end of the " << AllocSize @@ -660,11 +662,14 @@ private: bool Inserted = false; llvm::tie(PMI, Inserted) = MemTransferPartitionMap.insert(std::make_pair(&II, NewIdx)); - if (!Inserted && Offsets.IsSplittable) { + if (Offsets.IsSplittable && + (!Inserted || II.getRawSource() == II.getRawDest())) { // We've found a memory transfer intrinsic which refers to the alloca as - // both a source and dest. We refuse to split these to simplify splitting - // logic. If possible, SROA will still split them into separate allocas - // and then re-analyze. + // both a source and dest. This is detected either by direct equality of + // the operand values, or when we visit the intrinsic twice due to two + // different chains of values leading to it. We refuse to split these to + // simplify splitting logic. If possible, SROA will still split them into + // separate allocas and then re-analyze. Offsets.IsSplittable = false; P.Partitions[PMI->second].IsSplittable = false; P.Partitions[NewIdx].IsSplittable = false; @@ -697,6 +702,9 @@ private: SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses; Visited.insert(Root); Uses.push_back(std::make_pair(cast<Instruction>(*U), Root)); + // If there are no loads or stores, the access is dead. We mark that as + // a size zero access. + Size = 0; do { Instruction *I, *UsedI; llvm::tie(UsedI, I) = Uses.pop_back_val(); @@ -824,9 +832,9 @@ private: } void insertUse(Instruction &User, int64_t Offset, uint64_t Size) { - // If the use extends outside of the allocation, record it as a dead use - // for elimination later. - if ((uint64_t)Offset >= AllocSize || + // If the use has a zero size or extends outside of the allocation, record + // it as a dead use for elimination later. + if (Size == 0 || (uint64_t)Offset >= AllocSize || (Offset < 0 && (uint64_t)-Offset >= Size)) return markAsDead(User); @@ -853,7 +861,7 @@ private: PartitionUse NewUse(std::max(I->BeginOffset, BeginOffset), std::min(I->EndOffset, EndOffset), &User, cast<Instruction>(*U)); - P.Uses[I - P.begin()].push_back(NewUse); + P.use_push_back(I, NewUse); if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser())) P.PHIOrSelectOpMap[std::make_pair(&User, U->get())] = std::make_pair(I - P.begin(), P.Uses[I - P.begin()].size() - 1); @@ -1102,8 +1110,6 @@ AllocaPartitioning::AllocaPartitioning(const TargetData &TD, AllocaInst &AI) Uses.resize(Partitions.size()); UseBuilder UB(TD, AI, *this); UB(); - for (iterator I = Partitions.begin(), E = Partitions.end(); I != E; ++I) - std::stable_sort(use_begin(I), use_end(I)); } Type *AllocaPartitioning::getCommonType(iterator I) const { @@ -1890,7 +1896,8 @@ private: Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy, uint64_t Offset) { assert(IntPromotionTy && "Alloca is not an integer we can extract from"); - Value *V = IRB.CreateLoad(&NewAI, getName(".load")); + Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t RelOffset = Offset - NewAllocaBeginOffset; if (RelOffset) @@ -1906,7 +1913,7 @@ private: StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) { IntegerType *Ty = cast<IntegerType>(V->getType()); if (Ty == IntPromotionTy) - return IRB.CreateStore(V, &NewAI); + return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() && "Cannot insert a larger integer!"); @@ -1918,10 +1925,12 @@ private: APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth()) .shl(RelOffset*8); - Value *Old = IRB.CreateAnd(IRB.CreateLoad(&NewAI, getName(".oldload")), + Value *Old = IRB.CreateAnd(IRB.CreateAlignedLoad(&NewAI, + NewAI.getAlignment(), + getName(".oldload")), Mask, getName(".mask")); - return IRB.CreateStore(IRB.CreateOr(Old, V, getName(".insert")), - &NewAI); + return IRB.CreateAlignedStore(IRB.CreateOr(Old, V, getName(".insert")), + &NewAI, NewAI.getAlignment()); } void deleteIfTriviallyDead(Value *V) { @@ -1943,12 +1952,12 @@ private: Value *Result; if (LI.getType() == VecTy->getElementType() || BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { - Result - = IRB.CreateExtractElement(IRB.CreateLoad(&NewAI, getName(".load")), - getIndex(IRB, BeginOffset), - getName(".extract")); + Result = IRB.CreateExtractElement( + IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), + getIndex(IRB, BeginOffset), getName(".extract")); } else { - Result = IRB.CreateLoad(&NewAI, getName(".load")); + Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); } if (Result->getType() != LI.getType()) Result = getValueCast(IRB, Result, LI.getType()); @@ -1983,6 +1992,9 @@ private: Value *NewPtr = getAdjustedAllocaPtr(IRB, LI.getPointerOperand()->getType()); LI.setOperand(0, NewPtr); + if (LI.getAlignment()) + LI.setAlignment(MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset)); DEBUG(dbgs() << " to: " << LI << "\n"); deleteIfTriviallyDead(OldOp); @@ -1996,13 +2008,14 @@ private: BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { if (V->getType() != ElementTy) V = getValueCast(IRB, V, ElementTy); - V = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V, - getIndex(IRB, BeginOffset), + LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), + getName(".load")); + V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset), getName(".insert")); } else if (V->getType() != VecTy) { V = getValueCast(IRB, V, VecTy); } - StoreInst *Store = IRB.CreateStore(V, &NewAI); + StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); Pass.DeadInsts.push_back(&SI); (void)Store; @@ -2033,6 +2046,9 @@ private: Value *NewPtr = getAdjustedAllocaPtr(IRB, SI.getPointerOperand()->getType()); SI.setOperand(1, NewPtr); + if (SI.getAlignment()) + SI.setAlignment(MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset)); DEBUG(dbgs() << " to: " << SI << "\n"); deleteIfTriviallyDead(OldOp); @@ -2048,6 +2064,15 @@ private: // pointer to the new alloca. if (!isa<Constant>(II.getLength())) { II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType())); + + Type *CstTy = II.getAlignmentCst()->getType(); + if (!NewAI.getAlignment()) + II.setAlignment(ConstantInt::get(CstTy, 0)); + else + II.setAlignment( + ConstantInt::get(CstTy, MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset))); + deleteIfTriviallyDead(OldPtr); return false; } @@ -2067,11 +2092,15 @@ private: !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) { Type *SizeTy = II.getLength()->getType(); Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset); + unsigned Align = 1; + if (NewAI.getAlignment()) + Align = MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset); CallInst *New = IRB.CreateMemSet(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType()), - II.getValue(), Size, II.getAlignment(), + II.getValue(), Size, Align, II.isVolatile()); (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); @@ -2109,11 +2138,13 @@ private: // If this is an element-wide memset of a vectorizable alloca, insert it. if (VecTy && (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)) { - StoreInst *Store = IRB.CreateStore( - IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), V, - getIndex(IRB, BeginOffset), + StoreInst *Store = IRB.CreateAlignedStore( + IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI, + NewAI.getAlignment(), + getName(".load")), + V, getIndex(IRB, BeginOffset), getName(".insert")), - &NewAI); + &NewAI, NewAI.getAlignment()); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); return true; @@ -2131,7 +2162,8 @@ private: assert(V->getType() == VecTy); } - Value *New = IRB.CreateStore(V, &NewAI, II.isVolatile()); + Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), + II.isVolatile()); (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); return !II.isVolatile(); @@ -2164,6 +2196,13 @@ private: else II.setSource(getAdjustedAllocaPtr(IRB, II.getRawSource()->getType())); + Type *CstTy = II.getAlignmentCst()->getType(); + if (II.getAlignment() > 1) + II.setAlignment(ConstantInt::get( + CstTy, MinAlign(II.getAlignment(), + MinAlign(NewAI.getAlignment(), + BeginOffset - NewAllocaBeginOffset)))); + DEBUG(dbgs() << " to: " << II << "\n"); deleteIfTriviallyDead(OldOp); return false; @@ -2221,6 +2260,11 @@ private: OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, getName("." + OtherPtr->getName())); + unsigned Align = II.getAlignment(); + if (Align > 1) + Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(), + MinAlign(II.getAlignment(), NewAI.getAlignment())); + // Strip all inbounds GEPs and pointer casts to try to dig out any root // alloca that should be re-examined after rewriting this instruction. if (AllocaInst *AI @@ -2236,8 +2280,7 @@ private: CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr, IsDest ? OtherPtr : OurPtr, - Size, II.getAlignment(), - II.isVolatile()); + Size, Align, II.isVolatile()); (void)New; DEBUG(dbgs() << " to: " << *New << "\n"); return false; @@ -2251,22 +2294,25 @@ private: Value *Src; if (IsVectorElement && !IsDest) { // We have to extract rather than load. - Src = IRB.CreateExtractElement(IRB.CreateLoad(SrcPtr, - getName(".copyload")), - getIndex(IRB, BeginOffset), - getName(".copyextract")); + Src = IRB.CreateExtractElement( + IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")), + getIndex(IRB, BeginOffset), + getName(".copyextract")); } else { - Src = IRB.CreateLoad(SrcPtr, II.isVolatile(), getName(".copyload")); + Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(), + getName(".copyload")); } if (IsVectorElement && IsDest) { // We have to insert into a loaded copy before storing. - Src = IRB.CreateInsertElement(IRB.CreateLoad(&NewAI, getName(".load")), - Src, getIndex(IRB, BeginOffset), - getName(".insert")); + Src = IRB.CreateInsertElement( + IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), + Src, getIndex(IRB, BeginOffset), + getName(".insert")); } - Value *Store = IRB.CreateStore(Src, DstPtr, II.isVolatile()); + StoreInst *Store = cast<StoreInst>( + IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile())); (void)Store; DEBUG(dbgs() << " to: " << *Store << "\n"); return !II.isVolatile(); @@ -2460,8 +2506,7 @@ private: else { AllocaPartitioning::PartitionUse OtherUse = *UI; OtherUse.User = Load; - P.use_insert(PI, std::upper_bound(UI, P.use_end(PI), OtherUse), - OtherUse); + P.use_push_back(PI, OtherUse); } } } @@ -2559,7 +2604,7 @@ private: LoadInst *OtherLoad = IsTrueVal ? FL : TL; assert(OtherUse.Ptr == OtherLoad->getOperand(0)); OtherUse.User = OtherLoad; - P.use_insert(PI, P.use_end(PI), OtherUse); + P.use_push_back(PI, OtherUse); } // Transfer alignment and TBAA info if present. @@ -2576,8 +2621,6 @@ private: LI->replaceAllUsesWith(V); Pass.DeadInsts.push_back(LI); } - if (PI != P.end()) - std::stable_sort(P.use_begin(PI), P.use_end(PI)); deleteIfTriviallyDead(OldPtr); return NewPtr == &NewAI; @@ -2959,9 +3002,19 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI, assert(PI == P.begin() && "Begin offset is zero on later partition"); NewAI = &AI; } else { - // FIXME: The alignment here is overly conservative -- we could in many - // cases get away with much weaker alignment constraints. - NewAI = new AllocaInst(AllocaTy, 0, AI.getAlignment(), + unsigned Alignment = AI.getAlignment(); + if (!Alignment) { + // The minimum alignment which users can rely on when the explicit + // alignment is omitted or zero is that required by the ABI for this + // type. + Alignment = TD->getABITypeAlignment(AI.getAllocatedType()); + } + Alignment = MinAlign(Alignment, PI->BeginOffset); + // If we will get at least this much alignment from the type alone, leave + // the alloca's alignment unconstrained. + if (Alignment <= TD->getABITypeAlignment(AllocaTy)) + Alignment = 0; + NewAI = new AllocaInst(AllocaTy, 0, Alignment, AI.getName() + ".sroa." + Twine(PI - P.begin()), &AI); ++NumNewAllocas; |