diff options
-rw-r--r-- | include/llvm/Transforms/Scalar.h | 2 | ||||
-rw-r--r-- | lib/Transforms/IPO/PassManagerBuilder.cpp | 7 | ||||
-rw-r--r-- | lib/Transforms/Scalar/SROA.cpp | 174 | ||||
-rw-r--r-- | test/Transforms/SROA/basictest.ll | 14 |
4 files changed, 182 insertions, 15 deletions
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index 18114ce293..a5d8eed746 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -72,7 +72,7 @@ FunctionPass *createAggressiveDCEPass(); // // SROA - Replace aggregates or pieces of aggregates with scalar SSA values. // -FunctionPass *createSROAPass(); +FunctionPass *createSROAPass(bool RequiresDomTree = true); //===----------------------------------------------------------------------===// // diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 0d15870f01..105b2886d9 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -41,7 +41,7 @@ UseGVNAfterVectorization("use-gvn-after-vectorization", cl::desc("Run GVN instead of Early CSE after vectorization passes")); static cl::opt<bool> UseNewSROA("use-new-sroa", - cl::init(false), cl::Hidden, + cl::init(true), cl::Hidden, cl::desc("Enable the new, experimental SROA pass")); PassManagerBuilder::PassManagerBuilder() { @@ -154,7 +154,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { // Start of function pass. // Break up aggregate allocas, using SSAUpdater. - MPM.add(createScalarReplAggregatesPass(-1, false)); + if (UseNewSROA) + MPM.add(createSROAPass(/*RequiresDomTree*/ false)); + else + MPM.add(createScalarReplAggregatesPass(-1, false)); MPM.add(createEarlyCSEPass()); // Catch trivial redundancies if (!DisableSimplifyLibCalls) MPM.add(createSimplifyLibCallsPass()); // Library Call Optimizations diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 9dcf12d2f3..6691059d35 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -47,6 +47,7 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" @@ -67,6 +68,11 @@ STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion"); STATISTIC(NumDeleted, "Number of instructions deleted"); STATISTIC(NumVectorized, "Number of vectorized aggregates"); +/// Hidden option to force the pass to not use DomTree and mem2reg, instead +/// forming SSA values through the SSAUpdater infrastructure. +static cl::opt<bool> +ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden); + namespace { /// \brief Alloca partitioning representation. /// @@ -1115,6 +1121,90 @@ void AllocaPartitioning::dump() const { print(dbgs()); } namespace { +/// \brief Implementation of LoadAndStorePromoter for promoting allocas. +/// +/// This subclass of LoadAndStorePromoter adds overrides to handle promoting +/// the loads and stores of an alloca instruction, as well as updating its +/// debug information. This is used when a domtree is unavailable and thus +/// mem2reg in its full form can't be used to handle promotion of allocas to +/// scalar values. +class AllocaPromoter : public LoadAndStorePromoter { + AllocaInst &AI; + DIBuilder &DIB; + + SmallVector<DbgDeclareInst *, 4> DDIs; + SmallVector<DbgValueInst *, 4> DVIs; + +public: + AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, + AllocaInst &AI, DIBuilder &DIB) + : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {} + + void run(const SmallVectorImpl<Instruction*> &Insts) { + // Remember which alloca we're promoting (for isInstInList). + if (MDNode *DebugNode = MDNode::getIfExists(AI.getContext(), &AI)) { + for (Value::use_iterator UI = DebugNode->use_begin(), + UE = DebugNode->use_end(); + UI != UE; ++UI) + if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) + DDIs.push_back(DDI); + else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI)) + DVIs.push_back(DVI); + } + + LoadAndStorePromoter::run(Insts); + AI.eraseFromParent(); + while (!DDIs.empty()) + DDIs.pop_back_val()->eraseFromParent(); + while (!DVIs.empty()) + DVIs.pop_back_val()->eraseFromParent(); + } + + virtual bool isInstInList(Instruction *I, + const SmallVectorImpl<Instruction*> &Insts) const { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return LI->getOperand(0) == &AI; + return cast<StoreInst>(I)->getPointerOperand() == &AI; + } + + virtual void updateDebugInfo(Instruction *Inst) const { + for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(), + E = DDIs.end(); I != E; ++I) { + DbgDeclareInst *DDI = *I; + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); + else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) + ConvertDebugDeclareToDebugValue(DDI, LI, DIB); + } + for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), + E = DVIs.end(); I != E; ++I) { + DbgValueInst *DVI = *I; + Value *Arg = NULL; + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + // If an argument is zero extended then use argument directly. The ZExt + // may be zapped by an optimization pass in future. + if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) + Arg = dyn_cast<Argument>(ZExt->getOperand(0)); + if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) + Arg = dyn_cast<Argument>(SExt->getOperand(0)); + if (!Arg) + Arg = SI->getOperand(0); + } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + Arg = LI->getOperand(0); + } else { + continue; + } + Instruction *DbgVal = + DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()), + Inst); + DbgVal->setDebugLoc(DVI->getDebugLoc()); + } + } +}; +} // end anon namespace + + +namespace { /// \brief An optimization pass providing Scalar Replacement of Aggregates. /// /// This pass takes allocations which can be completely analyzed (that is, they @@ -1134,6 +1224,8 @@ namespace { /// this form. By doing so, it will enable promotion of vector aggregates to /// SSA vector values. class SROA : public FunctionPass { + const bool RequiresDomTree; + LLVMContext *C; const TargetData *TD; DominatorTree *DT; @@ -1160,7 +1252,9 @@ class SROA : public FunctionPass { std::vector<AllocaInst *> PromotableAllocas; public: - SROA() : FunctionPass(ID), C(0), TD(0), DT(0) { + SROA(bool RequiresDomTree = true) + : FunctionPass(ID), RequiresDomTree(RequiresDomTree), + C(0), TD(0), DT(0) { initializeSROAPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F); @@ -1179,13 +1273,14 @@ private: bool splitAlloca(AllocaInst &AI, AllocaPartitioning &P); bool runOnAlloca(AllocaInst &AI); void deleteDeadInstructions(SmallPtrSet<AllocaInst *, 4> &DeletedAllocas); + bool promoteAllocas(Function &F); }; } char SROA::ID = 0; -FunctionPass *llvm::createSROAPass() { - return new SROA(); +FunctionPass *llvm::createSROAPass(bool RequiresDomTree) { + return new SROA(RequiresDomTree); } INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates", @@ -2598,6 +2693,66 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) { } } +/// \brief Promote the allocas, using the best available technique. +/// +/// This attempts to promote whatever allocas have been identified as viable in +/// the PromotableAllocas list. If that list is empty, there is nothing to do. +/// If there is a domtree available, we attempt to promote using the full power +/// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is +/// based on the SSAUpdater utilities. This function returns whether any +/// promotion occured. +bool SROA::promoteAllocas(Function &F) { + if (PromotableAllocas.empty()) + return false; + + NumPromoted += PromotableAllocas.size(); + + if (DT && !ForceSSAUpdater) { + DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); + PromoteMemToReg(PromotableAllocas, *DT); + PromotableAllocas.clear(); + return true; + } + + DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n"); + SSAUpdater SSA; + DIBuilder DIB(*F.getParent()); + SmallVector<Instruction*, 64> Insts; + + for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) { + AllocaInst *AI = PromotableAllocas[Idx]; + for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE;) { + Instruction *I = cast<Instruction>(*UI++); + // FIXME: Currently the SSAUpdater infrastructure doesn't reason about + // lifetime intrinsics and so we strip them (and the bitcasts+GEPs + // leading to them) here. Eventually it should use them to optimize the + // scalar values produced. + if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) { + assert(onlyUsedByLifetimeMarkers(I) && + "Found a bitcast used outside of a lifetime marker."); + while (!I->use_empty()) + cast<Instruction>(*I->use_begin())->eraseFromParent(); + I->eraseFromParent(); + continue; + } + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + assert(II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end); + II->eraseFromParent(); + continue; + } + + Insts.push_back(I); + } + AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts); + Insts.clear(); + } + + PromotableAllocas.clear(); + return true; +} + namespace { /// \brief A predicate to test whether an alloca belongs to a set. class IsAllocaInSet { @@ -2618,7 +2773,7 @@ bool SROA::runOnFunction(Function &F) { DEBUG(dbgs() << " Skipping SROA -- no target data!\n"); return false; } - DT = &getAnalysis<DominatorTree>(); + DT = getAnalysisIfAvailable<DominatorTree>(); BasicBlock &EntryBB = F.getEntryBlock(); for (BasicBlock::iterator I = EntryBB.begin(), E = llvm::prior(EntryBB.end()); @@ -2643,18 +2798,13 @@ bool SROA::runOnFunction(Function &F) { } } - if (!PromotableAllocas.empty()) { - DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); - PromoteMemToReg(PromotableAllocas, *DT); - Changed = true; - NumPromoted += PromotableAllocas.size(); - PromotableAllocas.clear(); - } + Changed |= promoteAllocas(F); return Changed; } void SROA::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<DominatorTree>(); + if (RequiresDomTree) + AU.addRequired<DominatorTree>(); AU.setPreservesCFG(); } diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll index e6a34857ad..95e4a78443 100644 --- a/test/Transforms/SROA/basictest.ll +++ b/test/Transforms/SROA/basictest.ll @@ -1,6 +1,10 @@ ; RUN: opt < %s -sroa -S | FileCheck %s +; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" +declare void @llvm.lifetime.start(i64, i8* nocapture) +declare void @llvm.lifetime.end(i64, i8* nocapture) + define i32 @test0() { ; CHECK: @test0 ; CHECK-NOT: alloca @@ -10,14 +14,24 @@ entry: %a1 = alloca i32 %a2 = alloca float + %a1.i8 = bitcast i32* %a1 to i8* + call void @llvm.lifetime.start(i64 4, i8* %a1.i8) + store i32 0, i32* %a1 %v1 = load i32* %a1 + call void @llvm.lifetime.end(i64 4, i8* %a1.i8) + + %a2.i8 = bitcast float* %a2 to i8* + call void @llvm.lifetime.start(i64 4, i8* %a2.i8) + store float 0.0, float* %a2 %v2 = load float * %a2 %v2.int = bitcast float %v2 to i32 %sum1 = add i32 %v1, %v2.int + call void @llvm.lifetime.end(i64 4, i8* %a2.i8) + ret i32 %sum1 } |