diff options
author | Chris Lattner <sabre@nondot.org> | 2011-01-23 22:04:55 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2011-01-23 22:04:55 +0000 |
commit | c87c50a39c1bc27437352feee0f6aba2d50fa1b5 (patch) | |
tree | 6703aad0252af11e7c6e9f92c99267bcdf99c431 | |
parent | 3928af6ac47f9abef7dff32823a5fd41743c8fbc (diff) |
Enhance SRoA to promote allocas that are used by selects in some
common cases. This triggers a surprising number of times in SPEC2K6
because min/max idioms end up doing this. For example, code from the
STL ends up looking like this to SRoA:
%202 = load i64* %__old_size, align 8, !tbaa !3
%203 = load i64* %__old_size, align 8, !tbaa !3
%204 = load i64* %__n, align 8, !tbaa !3
%205 = icmp ult i64 %203, %204
%storemerge.i = select i1 %205, i64* %__n, i64* %__old_size
%206 = load i64* %storemerge.i, align 8, !tbaa !3
We can now promote both the __n and the __old_size allocas.
This addresses another chunk of rdar://7339113, poor codegen on
stringswitch.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@124088 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/Scalar/ScalarReplAggregates.cpp | 133 | ||||
-rw-r--r-- | test/Transforms/ScalarRepl/phi-select.ll | 62 |
2 files changed, 190 insertions, 5 deletions
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 6b79695d6f..60302f7794 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -31,6 +31,7 @@ #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" @@ -43,12 +44,14 @@ #include "llvm/Support/IRBuilder.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumReplaced, "Number of allocas broken up"); STATISTIC(NumPromoted, "Number of allocas promoted"); +STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion"); STATISTIC(NumConverted, "Number of aggregates converted to scalar"); STATISTIC(NumGlobals, "Number of allocas copied from constant global"); @@ -862,6 +865,134 @@ public: }; } // end anon namespace +/// isSafeSelectToSpeculate - Select instructions that use an alloca and are +/// subsequently loaded can be rewritten to load both input pointers and then +/// select between the result, allowing the load of the alloca to be promoted. +/// From this: +/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other +/// %V = load i32* %P2 +/// to: +/// %V1 = load i32* %Alloca -> will be mem2reg'd +/// %V2 = load i32* %Other +/// %V = select i1 %cond, i32 %V1, i32* %V2 +/// +/// We can do this to a select if its only uses are loads and if the operand to +/// the select can be loaded unconditionally. +static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) { + bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(); + bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(); + + for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); + UI != UE; ++UI) { + LoadInst *LI = dyn_cast<LoadInst>(*UI); + if (LI == 0 || LI->isVolatile()) return false; + + // Both operands to the select need to be deferencable, either absolutely + // (e.g. allocas) or at this point because we can see other accesses to it. + if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI, + LI->getAlignment(), TD)) + return false; + if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI, + LI->getAlignment(), TD)) + return false; + } + + return true; +} + + +/// tryToMakeAllocaBePromotable - This returns true if the alloca only has +/// direct (non-volatile) loads and stores to it. If the alloca is close but +/// not quite there, this will transform the code to allow promotion. As such, +/// it is a non-pure predicate. +static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { + SetVector<Instruction*, SmallVector<Instruction*, 4>, + SmallPtrSet<Instruction*, 4> > InstsToRewrite; + + for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE; ++UI) { + User *U = *UI; + if (LoadInst *LI = dyn_cast<LoadInst>(U)) { + if (LI->isVolatile()) + return false; + continue; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (SI->getOperand(0) == AI || SI->isVolatile()) + return false; // Don't allow a store OF the AI, only INTO the AI. + continue; + } + + if (SelectInst *SI = dyn_cast<SelectInst>(U)) { + // If the condition being selected on is a constant, fold the select, yes + // this does (rarely) happen early on. + if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) { + Value *Result = SI->getOperand(1+CI->isZero()); + SI->replaceAllUsesWith(Result); + SI->eraseFromParent(); + + // This is very rare and we just scrambled the use list of AI, start + // over completely. + return tryToMakeAllocaBePromotable(AI, TD); + } + + // If it is safe to turn "load (select c, AI, ptr)" into a select of two + // loads, then we can transform this by rewriting the select. + if (!isSafeSelectToSpeculate(SI, TD)) + return false; + + InstsToRewrite.insert(SI); + continue; + } + + return false; + } + + // If there are no instructions to rewrite, then all uses are load/stores and + // we're done! + if (InstsToRewrite.empty()) + return true; + + // If we have instructions that need to be rewritten for this to be promotable + // take care of it now. + for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) { + // Selects in InstsToRewrite only have load uses. Rewrite each as two + // loads with a new select. + SelectInst *SI = cast<SelectInst>(InstsToRewrite[i]); + + for (Value::use_iterator UI = SI->use_begin(), E = SI->use_end(); UI != E;){ + LoadInst *LI = cast<LoadInst>(*UI++); + + IRBuilder<> Builder(LI); + LoadInst *TrueLoad = + Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); + LoadInst *FalseLoad = + Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t"); + + // Transfer alignment and TBAA info if present. + TrueLoad->setAlignment(LI->getAlignment()); + FalseLoad->setAlignment(LI->getAlignment()); + if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) { + TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag); + FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag); + } + + Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad); + V->takeName(LI); + LI->replaceAllUsesWith(V); + LI->eraseFromParent(); + } + + // Now that all the loads are gone, the select is gone too. + SI->eraseFromParent(); + } + + ++NumAdjusted; + return true; +} + + bool SROA::performPromotion(Function &F) { std::vector<AllocaInst*> Allocas; DominatorTree *DT = 0; @@ -879,7 +1010,7 @@ bool SROA::performPromotion(Function &F) { // the entry node for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? - if (isAllocaPromotable(AI)) + if (tryToMakeAllocaBePromotable(AI, TD)) Allocas.push_back(AI); if (Allocas.empty()) break; diff --git a/test/Transforms/ScalarRepl/phi-select.ll b/test/Transforms/ScalarRepl/phi-select.ll index 06ee208151..f612ed55a2 100644 --- a/test/Transforms/ScalarRepl/phi-select.ll +++ b/test/Transforms/ScalarRepl/phi-select.ll @@ -44,16 +44,15 @@ F: } ; CHECK: @test3 -; CHECK: %A.0 = alloca i32 -; CHECK: %A.1 = alloca i32 +; CHECK-NEXT: %Q = select i1 %c, i32 1, i32 2 +; CHECK-NEXT: ret i32 %Q ; rdar://8904039 define i32 @test3(i1 %c) { -entry: %A = alloca {i32, i32} %B = getelementptr {i32, i32}* %A, i32 0, i32 0 store i32 1, i32* %B %C = getelementptr {i32, i32}* %A, i32 0, i32 1 - store i32 2, i32* %B + store i32 2, i32* %C %X = select i1 %c, i32* %B, i32* %C %Q = load i32* %X @@ -76,3 +75,58 @@ entry: %Q = load i64* %Y ret i64 %Q } + + +;; +;; Tests for promoting allocas used by selects. +;; rdar://7339113 +;; + +define i32 @test5(i32 *%P) nounwind readnone ssp { +entry: + %b = alloca i32, align 8 + store i32 2, i32* %b, align 8 + + ;; Select on constant condition should be folded. + %p.0 = select i1 false, i32* %b, i32* %P + store i32 123, i32* %p.0 + + %r = load i32* %b, align 8 + ret i32 %r + +; CHECK: @test5 +; CHECK: store i32 123, i32* %P +; CHECK: ret i32 2 +} + +define i32 @test6(i32 %x, i1 %c) nounwind readnone ssp { + %a = alloca i32, align 8 + %b = alloca i32, align 8 + store i32 1, i32* %a, align 8 + store i32 2, i32* %b, align 8 + %p.0 = select i1 %c, i32* %b, i32* %a + %r = load i32* %p.0, align 8 + ret i32 %r +; CHECK: @test6 +; CHECK-NEXT: %r = select i1 %c, i32 2, i32 1 +; CHECK-NEXT: ret i32 %r +} + +; Verify that the loads happen where the loads are, not where the select is. +define i32 @test7(i32 %x, i1 %c) nounwind readnone ssp { + %a = alloca i32, align 8 + %b = alloca i32, align 8 + store i32 1, i32* %a + store i32 2, i32* %b + %p.0 = select i1 %c, i32* %b, i32* %a + + store i32 0, i32* %a + + %r = load i32* %p.0, align 8 + ret i32 %r +; CHECK: @test7 +; CHECK-NOT: alloca i32 +; CHECK: %r = select i1 %c, i32 2, i32 0 +; CHECK: ret i32 %r +} + |