aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp93
-rw-r--r--test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll91
-rw-r--r--test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll84
-rw-r--r--test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll138
4 files changed, 395 insertions, 11 deletions
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 301791a90d..a2d0e98cd3 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -71,7 +71,9 @@ namespace {
// LoopProcessWorklist - Used to check if second loop needs processing
// after RewriteLoopBodyWithConditionConstant rewrites first loop.
std::vector<Loop*> LoopProcessWorklist;
- SmallPtrSet<Value *,8> UnswitchedVals;
+
+ // FIXME: Consider custom class for this.
+ std::map<const SwitchInst*, SmallPtrSet<const Value *,8> > UnswitchedVals;
bool OptimizeForSize;
bool redoLoop;
@@ -117,7 +119,15 @@ namespace {
private:
virtual void releaseMemory() {
- UnswitchedVals.clear();
+ // We need to forget about all switches in the current loop.
+ // FIXME: Do it better than enumerating all blocks of code
+ // and see if it is a switch instruction.
+ for (Loop::block_iterator I = currentLoop->block_begin(),
+ E = currentLoop->block_end(); I != E; ++I) {
+ SwitchInst* SI = dyn_cast<SwitchInst>((*I)->getTerminator());
+ if (SI)
+ UnswitchedVals.erase(SI);
+ }
}
/// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
@@ -128,6 +138,12 @@ namespace {
if (I != LoopProcessWorklist.end())
LoopProcessWorklist.erase(I);
}
+
+ /// For new loop switches we clone info about values that was
+ /// already unswitched and has redundant successors.
+ /// Note, that new loop data is stored inside the VMap.
+ void CloneUnswitchedVals(const ValueToValueMapTy& VMap,
+ const BasicBlock* SrcBB);
void initLoopData() {
loopHeader = currentLoop->getHeader();
@@ -255,13 +271,25 @@ bool LoopUnswitch::processCurrentLoop() {
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
currentLoop, Changed);
- if (LoopCond && SI->getNumCases() > 1) {
+ unsigned NumCases = SI->getNumCases();
+ if (LoopCond && NumCases > 1) {
// Find a value to unswitch on:
// FIXME: this should chose the most expensive case!
// FIXME: scan for a case with a non-critical edge?
- Constant *UnswitchVal = SI->getCaseValue(1);
+ Constant *UnswitchVal = NULL;
+
// Do not process same value again and again.
- if (!UnswitchedVals.insert(UnswitchVal))
+ // At this point we have some cases already unswitched and
+ // some not yet unswitched. Let's find the first not yet unswitched one.
+ for (unsigned i = 1; i < NumCases; ++i) {
+ Constant* UnswitchValCandidate = SI->getCaseValue(i);
+ if (!UnswitchedVals[SI].count(UnswitchValCandidate)) {
+ UnswitchVal = UnswitchValCandidate;
+ break;
+ }
+ }
+
+ if (!UnswitchVal)
continue;
if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
@@ -287,6 +315,23 @@ bool LoopUnswitch::processCurrentLoop() {
return Changed;
}
+/// For new loop switches we clone info about values that was
+/// already unswitched and has redundant successors.
+/// Not that new loop data is stored inside the VMap.
+void LoopUnswitch::CloneUnswitchedVals(const ValueToValueMapTy& VMap,
+ const BasicBlock* SrcBB) {
+
+ const SwitchInst* SI = dyn_cast<SwitchInst>(SrcBB->getTerminator());
+ if (SI && UnswitchedVals.count(SI)) {
+ // Don't clone a totally simplified switch.
+ if (isa<Constant>(SI->getCondition()))
+ return;
+ Value* I = VMap.lookup(SI);
+ assert(I && "All instructions that are in SrcBB must be in VMap.");
+ UnswitchedVals[cast<SwitchInst>(I)] = UnswitchedVals[SI];
+ }
+}
+
/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
/// loop with no side effects (including infinite loops).
///
@@ -378,14 +423,25 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
// Check to see if a successor of the switch is guaranteed to go to the
// latch block or exit through a one exit block without having any
// side-effects. If so, determine the value of Cond that causes it to do
- // this. Note that we can't trivially unswitch on the default case.
- for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
- if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+ // this.
+ // Note that we can't trivially unswitch on the default case or
+ // on already unswitched cases.
+ for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock* LoopExitCandidate;
+ if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
SI->getSuccessor(i)))) {
// Okay, we found a trivial case, remember the value that is trivial.
- if (Val) *Val = SI->getCaseValue(i);
+ ConstantInt* CaseVal = SI->getCaseValue(i);
+
+ // Check that it was not unswitched before, since already unswitched
+ // trivial vals are looks trivial too.
+ if (UnswitchedVals[SI].count(CaseVal))
+ continue;
+ LoopExitBB = LoopExitCandidate;
+ if (Val) *Val = CaseVal;
break;
}
+ }
}
// If we didn't find a single unique LoopExit block, or if the loop exit block
@@ -447,8 +503,14 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
// expansion, and the number of basic blocks, to avoid loops with
// large numbers of branches which cause loop unswitching to go crazy.
// This is a very ad-hoc heuristic.
- if (Metrics.NumInsts > Threshold ||
- Metrics.NumBlocks * 5 > Threshold ||
+
+ unsigned NumUnswitched =
+ (NumSwitches + NumBranches) + 1 /*take in account current iteration*/;
+
+ unsigned NumInsts = Metrics.NumInsts * NumUnswitched;
+ unsigned NumBlocks = Metrics.NumBlocks * NumUnswitched;
+
+ if (NumInsts > Threshold || NumBlocks * 5 > Threshold ||
Metrics.containsIndirectBr || Metrics.isRecursive) {
DEBUG(dbgs() << "NOT unswitching loop %"
<< currentLoop->getHeader()->getName() << ", cost too high: "
@@ -620,6 +682,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
ValueToValueMapTy VMap;
for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
+
+ // Inherit simplified switches info for NewBB
+ // We needn't pass NewBB since its instructions are already contained
+ // inside the VMap.
+ CloneUnswitchedVals(VMap, LoopBlocks[i]);
+
NewBlocks.push_back(NewBB);
VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
@@ -945,6 +1013,9 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
BasicBlock *Switch = SI->getParent();
BasicBlock *SISucc = SI->getSuccessor(DeadCase);
BasicBlock *Latch = L->getLoopLatch();
+
+ UnswitchedVals[SI].insert(Val);
+
if (!SI->findCaseDest(SISucc)) continue; // Edge is critical.
// If the DeadCase successor dominates the loop latch, then the
// transformation isn't safe since it will delete the sole predecessor edge
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
new file mode 100644
index 0000000000..8389fe4643
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
@@ -0,0 +1,91 @@
+; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -S -loop-unswitch -verify-loop-info -verify-dom-info %s | FileCheck %s
+
+; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
+; STATS: 2 loop-unswitch - Number of switches unswitched
+
+; CHECK: %1 = icmp eq i32 %c, 1
+; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge
+
+; CHECK: ..split_crit_edge: ; preds = %0
+; CHECK-NEXT: br label %.split
+
+; CHECK: .split.us: ; preds = %0
+; CHECK-NEXT: br label %loop_begin.us
+
+; CHECK: loop_begin.us: ; preds = %loop_begin.backedge.us, %.split.us
+; CHECK-NEXT: %var_val.us = load i32* %var
+; CHECK-NEXT: switch i32 1, label %default.us-lcssa.us [
+; CHECK-NEXT: i32 1, label %inc.us
+
+; CHECK: inc.us: ; preds = %loop_begin.us
+; CHECK-NEXT: call void @incf() noreturn nounwind
+; CHECK-NEXT: br label %loop_begin.backedge.us
+
+; CHECK: .split: ; preds = %..split_crit_edge
+; CHECK-NEXT: %2 = icmp eq i32 %c, 2
+; CHECK-NEXT: br i1 %2, label %.split.split.us, label %.split..split.split_crit_edge
+
+; CHECK: .split..split.split_crit_edge: ; preds = %.split
+; CHECK-NEXT: br label %.split.split
+
+; CHECK: .split.split.us: ; preds = %.split
+; CHECK-NEXT: br label %loop_begin.us1
+
+; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us5, %.split.split.us
+; CHECK-NEXT: %var_val.us2 = load i32* %var
+; CHECK-NEXT: switch i32 2, label %default.us-lcssa.us-lcssa.us [
+; CHECK-NEXT: i32 1, label %inc.us3
+; CHECK-NEXT: i32 2, label %dec.us4
+; CHECK-NEXT: ]
+
+; CHECK: dec.us4: ; preds = %loop_begin.us1
+; CHECK-NEXT: call void @decf() noreturn nounwind
+; CHECK-NEXT: br label %loop_begin.backedge.us5
+
+; CHECK: .split.split: ; preds = %.split..split.split_crit_edge
+; CHECK-NEXT: br label %loop_begin
+
+; CHECK: loop_begin: ; preds = %loop_begin.backedge, %.split.split
+; CHECK-NEXT: %var_val = load i32* %var
+; CHECK-NEXT: switch i32 %c, label %default.us-lcssa.us-lcssa [
+; CHECK-NEXT: i32 1, label %inc
+; CHECK-NEXT: i32 2, label %dec
+; CHECK-NEXT: ]
+
+; CHECK: inc: ; preds = %loop_begin
+; CHECK-NEXT: br i1 true, label %us-unreachable.us-lcssa, label %inc.split
+
+; CHECK: dec: ; preds = %loop_begin
+; CHECK-NEXT: br i1 true, label %us-unreachable6, label %dec.split
+
+define i32 @test(i32* %var) {
+ %mem = alloca i32
+ store i32 2, i32* %mem
+ %c = load i32* %mem
+
+ br label %loop_begin
+
+loop_begin:
+
+ %var_val = load i32* %var
+
+ switch i32 %c, label %default [
+ i32 1, label %inc
+ i32 2, label %dec
+ ]
+
+inc:
+ call void @incf() noreturn nounwind
+ br label %loop_begin
+dec:
+ call void @decf() noreturn nounwind
+ br label %loop_begin
+default:
+ br label %loop_exit
+loop_exit:
+ ret i32 0
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
new file mode 100644
index 0000000000..21c0ec3e22
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
@@ -0,0 +1,84 @@
+; RUN: opt -loop-unswitch -loop-unswitch-threshold 30 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 30 -verify-loop-info -verify-dom-info %s | FileCheck %s
+
+; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
+; STATS: 1 loop-unswitch - Number of switches unswitched
+
+; ModuleID = '../llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll'
+
+; CHECK: %1 = icmp eq i32 %c, 1
+; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge
+
+; CHECK: ..split_crit_edge: ; preds = %0
+; CHECK-NEXT: br label %.split
+
+; CHECK: .split.us: ; preds = %0
+; CHECK-NEXT: br label %loop_begin.us
+
+; CHECK: loop_begin.us: ; preds = %loop_begin.backedge.us, %.split.us
+; CHECK: switch i32 1, label %second_switch.us [
+; CHECK-NEXT: i32 1, label %inc.us
+
+; CHECK: inc.us: ; preds = %second_switch.us, %loop_begin.us
+; CHECK-NEXT: call void @incf() noreturn nounwind
+; CHECK-NEXT: br label %loop_begin.backedge.us
+
+; CHECK: second_switch.us: ; preds = %loop_begin.us
+; CHECK-NEXT: switch i32 %d, label %default.us [
+; CHECK-NEXT: i32 1, label %inc.us
+; CHECK-NEXT: ]
+
+; CHECK: .split: ; preds = %..split_crit_edge
+; CHECK-NEXT: br label %loop_begin
+
+; CHECK: loop_begin: ; preds = %loop_begin.backedge, %.split
+; CHECK: switch i32 %c, label %second_switch [
+; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge
+; CHECK-NEXT: ]
+
+; CHECK: loop_begin.inc_crit_edge: ; preds = %loop_begin
+; CHECK-NEXT: br i1 true, label %us-unreachable, label %inc
+
+; CHECK: second_switch: ; preds = %loop_begin
+; CHECK-NEXT: switch i32 %d, label %default [
+; CHECK-NEXT: i32 1, label %inc
+; CHECK-NEXT: ]
+
+; CHECK: inc: ; preds = %loop_begin.inc_crit_edge, %second_switch
+; CHECK-NEXT: call void @incf() noreturn nounwind
+; CHECK-NEXT: br label %loop_begin.backedge
+
+define i32 @test(i32* %var) {
+ %mem = alloca i32
+ store i32 2, i32* %mem
+ %c = load i32* %mem
+ %d = load i32* %mem
+
+ br label %loop_begin
+
+loop_begin:
+
+ %var_val = load i32* %var
+
+ switch i32 %c, label %second_switch [
+ i32 1, label %inc
+ ]
+
+second_switch:
+ switch i32 %d, label %default [
+ i32 1, label %inc
+ ]
+
+inc:
+ call void @incf() noreturn nounwind
+ br label %loop_begin
+
+default:
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
new file mode 100644
index 0000000000..1b186d6bec
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
@@ -0,0 +1,138 @@
+; RUN: opt -loop-unswitch -loop-unswitch-threshold 1000 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 1000 -verify-loop-info -verify-dom-info %s | FileCheck %s
+
+; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted
+; STATS: 3 loop-unswitch - Number of switches unswitched
+
+; CHECK: %1 = icmp eq i32 %c, 1
+; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge
+
+; CHECK: ..split_crit_edge: ; preds = %0
+; CHECK-NEXT: br label %.split
+
+; CHECK: .split.us: ; preds = %0
+; CHECK-NEXT: %2 = icmp eq i32 %d, 1
+; CHECK-NEXT: br i1 %2, label %.split.us.split.us, label %.split.us..split.us.split_crit_edge
+
+; CHECK: .split.us..split.us.split_crit_edge: ; preds = %.split.us
+; CHECK-NEXT: br label %.split.us.split
+
+; CHECK: .split.us.split.us: ; preds = %.split.us
+; CHECK-NEXT: br label %loop_begin.us.us
+
+; CHECK: loop_begin.us.us: ; preds = %loop_begin.backedge.us.us, %.split.us.split.us
+; CHECK-NEXT: %var_val.us.us = load i32* %var
+; CHECK-NEXT: switch i32 1, label %second_switch.us.us [
+; CHECK-NEXT: i32 1, label %inc.us.us
+
+; CHECK: inc.us.us: ; preds = %second_switch.us.us, %loop_begin.us.us
+; CHECK-NEXT: call void @incf() noreturn nounwind
+; CHECK-NEXT: br label %loop_begin.backedge.us.us
+
+; CHECK: second_switch.us.us: ; preds = %loop_begin.us.us
+; CHECK-NEXT: switch i32 1, label %default.us.us [
+; CHECK-NEXT: i32 1, label %inc.us.us
+
+; CHECK: .split.us.split: ; preds = %.split.us..split.us.split_crit_edge
+; CHECK-NEXT: br label %loop_begin.us
+
+; CHECK: loop_begin.us: ; preds = %loop_begin.backedge.us, %.split.us.split
+; CHECK-NEXT: %var_val.us = load i32* %var
+; CHECK-NEXT: switch i32 1, label %second_switch.us [
+; CHECK-NEXT: i32 1, label %inc.us
+
+; CHECK: inc.us: ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
+; CHECK-NEXT: call void @incf() noreturn nounwind
+; CHECK-NEXT: br label %loop_begin.backedge.us
+
+; CHECK: second_switch.us: ; preds = %loop_begin.us
+; CHECK-NEXT: switch i32 %d, label %default.us [
+; CHECK-NEXT: i32 1, label %second_switch.us.inc.us_crit_edge
+; CHECK-NEXT: ]
+
+; CHECK: second_switch.us.inc.us_crit_edge: ; preds = %second_switch.us
+; CHECK-NEXT: br i1 true, label %us-unreachable8, label %inc.us
+
+; CHECK: .split: ; preds = %..split_crit_edge
+; CHECK-NEXT: %3 = icmp eq i32 %d, 1
+; CHECK-NEXT: br i1 %3, label %.split.split.us, label %.split..split.split_crit_edge
+
+; CHECK: .split..split.split_crit_edge: ; preds = %.split
+; CHECK-NEXT: br label %.split.split
+
+; CHECK: .split.split.us: ; preds = %.split
+; CHECK-NEXT: br label %loop_begin.us1
+
+; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us6, %.split.split.us
+; CHECK-NEXT: %var_val.us2 = load i32* %var
+; CHECK-NEXT: switch i32 %c, label %second_switch.us4 [
+; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge.us
+; CHECK-NEXT: ]
+
+; CHECK: inc.us3: ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us4
+; CHECK-NEXT: call void @incf() noreturn nounwind
+; CHECK-NEXT: br label %loop_begin.backedge.us6
+
+; CHECK: second_switch.us4: ; preds = %loop_begin.us1
+; CHECK-NEXT: switch i32 1, label %default.us5 [
+; CHECK-NEXT: i32 1, label %inc.us3
+; CHECK-NEXT: ]
+
+; CHECK: loop_begin.inc_crit_edge.us: ; preds = %loop_begin.us1
+; CHECK-NEXT: br i1 true, label %us-unreachable.us-lcssa.us, label %inc.us3
+
+; CHECK: .split.split: ; preds = %.split..split.split_crit_edge
+; CHECK-NEXT: br label %loop_begin
+
+; CHECK: loop_begin: ; preds = %loop_begin.backedge, %.split.split
+; CHECK-NEXT: %var_val = load i32* %var
+; CHECK-NEXT: switch i32 %c, label %second_switch [
+; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge
+; CHECK-NEXT: ]
+
+; CHECK: loop_begin.inc_crit_edge: ; preds = %loop_begin
+; CHECK-NEXT: br i1 true, label %us-unreachable.us-lcssa, label %inc
+
+; CHECK: second_switch: ; preds = %loop_begin
+; CHECK-NEXT: switch i32 %d, label %default [
+; CHECK-NEXT: i32 1, label %second_switch.inc_crit_edge
+; CHECK-NEXT: ]
+
+; CHECK: second_switch.inc_crit_edge: ; preds = %second_switch
+; CHECK-NEXT: br i1 true, label %us-unreachable7, label %inc
+
+
+define i32 @test(i32* %var) {
+ %mem = alloca i32
+ store i32 2, i32* %mem
+ %c = load i32* %mem
+ %d = load i32* %mem
+
+ br label %loop_begin
+
+loop_begin:
+
+ %var_val = load i32* %var
+
+ switch i32 %c, label %second_switch [
+ i32 1, label %inc
+ ]
+
+second_switch:
+ switch i32 %d, label %default [
+ i32 1, label %inc
+ ]
+
+inc:
+ call void @incf() noreturn nounwind
+ br label %loop_begin
+
+default:
+ br label %loop_begin
+
+loop_exit:
+ ret i32 0
+}
+
+declare void @incf() noreturn
+declare void @decf() noreturn