aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Transforms/Utils/Local.h7
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp75
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp2
-rw-r--r--test/Transforms/LoopSimplify/merge-exits.ll45
4 files changed, 128 insertions, 1 deletions
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 98a68f6461..dd423fa3b1 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -19,6 +19,7 @@ namespace llvm {
class User;
class BasicBlock;
+class BranchInst;
class Instruction;
class Value;
class Pass;
@@ -94,6 +95,12 @@ void MergeBasicBlockIntoOnlyPred(BasicBlock *BB);
///
bool SimplifyCFG(BasicBlock *BB);
+/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch,
+/// and if a predecessor branches to us and one of our successors, fold the
+/// setcc into the predecessor and use logical operations to pick the right
+/// destination.
+bool FoldBranchToCommonDest(BranchInst *BI);
+
/// DemoteRegToStack - This function takes a virtual register computed by an
/// Instruction and replaces it with a slot in the stack frame, allocated via
/// alloca. This allows the CFG to be changed around without fear of
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 03d273d25d..d03591081f 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -42,6 +42,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Compiler.h"
#include "llvm/ADT/SetOperations.h"
@@ -258,6 +259,80 @@ ReprocessLoop:
PN->eraseFromParent();
}
+ // If this loop has muliple exits and the exits all go to the same
+ // block, attempt to merge the exits. This helps several passes, such
+ // as LoopRotation, which do not support loops with multiple exits.
+ // SimplifyCFG also does this (and this code uses the same utility
+ // function), however this code is loop-aware, where SimplifyCFG is
+ // not. That gives it the advantage of being able to hoist
+ // loop-invariant instructions out of the way to open up more
+ // opportunities, and the disadvantage of having the responsibility
+ // to preserve dominator information.
+ if (ExitBlocks.size() > 1 && L->getUniqueExitBlock()) {
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
+ if (!ExitingBlock->getSinglePredecessor()) continue;
+ BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!BI || !BI->isConditional()) continue;
+ CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+ if (!CI || CI->getParent() != ExitingBlock) continue;
+
+ // Attempt to hoist out all instructions except for the
+ // comparison and the branch.
+ bool AllInvariant = true;
+ for (BasicBlock::iterator I = ExitingBlock->begin(),
+ E = ExitingBlock->end(); I != E; ) {
+ Instruction *Inst = I++;
+ if (Inst == BI || Inst == CI)
+ continue;
+ if (Inst->isTrapping()) {
+ AllInvariant = false;
+ break;
+ }
+ for (unsigned j = 0, f = Inst->getNumOperands(); j != f; ++j)
+ if (!L->isLoopInvariant(Inst->getOperand(j))) {
+ AllInvariant = false;
+ break;
+ }
+ if (!AllInvariant)
+ break;
+ // Hoist.
+ Inst->moveBefore(L->getLoopPreheader()->getTerminator());
+ }
+ if (!AllInvariant) continue;
+
+ // The block has now been cleared of all instructions except for
+ // a comparison and a conditional branch. SimplifyCFG may be able
+ // to fold it now.
+ if (!FoldBranchToCommonDest(BI)) continue;
+
+ // Success. The block is now dead, so remove it from the loop,
+ // update the dominator tree and dominance frontier, and delete it.
+ assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+ Changed = true;
+ L->removeBlockFromLoop(ExitingBlock);
+
+ DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
+ DomTreeNode *Node = DT->getNode(ExitingBlock);
+ const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
+ Node->getChildren();
+ for (unsigned k = 0, g = Children.size(); k != g; ++k) {
+ DT->changeImmediateDominator(Children[k], Node->getIDom());
+ if (DF) DF->changeImmediateDominator(Children[k]->getBlock(),
+ Node->getIDom()->getBlock(),
+ DT);
+ }
+ DT->eraseNode(ExitingBlock);
+ if (DF) DF->removeBlock(ExitingBlock);
+
+ BI->getSuccessor(0)->removePredecessor(ExitingBlock);
+ BI->getSuccessor(1)->removePredecessor(ExitingBlock);
+ ExitingBlock->eraseFromParent();
+ }
+ }
+
return Changed;
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index ee0f6a65de..58d4d5a344 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1492,7 +1492,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
/// and if a predecessor branches to us and one of our successors, fold the
/// setcc into the predecessor and use logical operations to pick the right
/// destination.
-static bool FoldBranchToCommonDest(BranchInst *BI) {
+bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
BasicBlock *BB = BI->getParent();
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
if (Cond == 0) return false;
diff --git a/test/Transforms/LoopSimplify/merge-exits.ll b/test/Transforms/LoopSimplify/merge-exits.ll
new file mode 100644
index 0000000000..c5bf7fdc3c
--- /dev/null
+++ b/test/Transforms/LoopSimplify/merge-exits.ll
@@ -0,0 +1,45 @@
+; RUN: llvm-as < %s | opt -loopsimplify -loop-rotate -instcombine -indvars \
+; RUN: | llvm-dis > %t
+; RUN: not grep sext %t
+; RUN: grep {phi i64} %t | count 1
+
+; Loopsimplify should be able to merge the two loop exits
+; into one, so that loop rotate can rotate the loop, so
+; that indvars can promote the induction variable to i64
+; without needing casts.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define float @t(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
+entry:
+ %t0 = load float* %peakWeight, align 4 ; <float> [#uses=1]
+ br label %bb1
+
+bb: ; preds = %bb2
+ %t1 = sext i32 %hiPart.0 to i64 ; <i64> [#uses=1]
+ %t2 = getelementptr float* %pTmp1, i64 %t1 ; <float*> [#uses=1]
+ %t3 = load float* %t2, align 4 ; <float> [#uses=1]
+ %t4 = fadd float %t3, %distERBhi.0 ; <float> [#uses=1]
+ %t5 = add i32 %hiPart.0, 1 ; <i32> [#uses=2]
+ %t6 = sext i32 %t5 to i64 ; <i64> [#uses=1]
+ %t7 = getelementptr float* %peakWeight, i64 %t6 ; <float*> [#uses=1]
+ %t8 = load float* %t7, align 4 ; <float> [#uses=1]
+ %t9 = fadd float %t8, %peakCount.0 ; <float> [#uses=1]
+ br label %bb1
+
+bb1: ; preds = %bb, %entry
+ %peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ] ; <float> [#uses=2]
+ %hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ] ; <i32> [#uses=3]
+ %distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ] ; <float> [#uses=3]
+ %t10 = fcmp uge float %distERBhi.0, 2.500000e+00 ; <i1> [#uses=1]
+ br i1 %t10, label %bb3, label %bb2
+
+bb2: ; preds = %bb1
+ %t11 = add i32 %bandEdgeIndex, -1 ; <i32> [#uses=1]
+ %t12 = icmp sgt i32 %t11, %hiPart.0 ; <i1> [#uses=1]
+ br i1 %t12, label %bb, label %bb3
+
+bb3: ; preds = %bb2, %bb1
+ %t13 = fdiv float %peakCount.0, %distERBhi.0 ; <float> [#uses=1]
+ ret float %t13
+}