diff options
4 files changed, 395 insertions, 11 deletions
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 301791a90d..a2d0e98cd3 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -71,7 +71,9 @@ namespace { // LoopProcessWorklist - Used to check if second loop needs processing // after RewriteLoopBodyWithConditionConstant rewrites first loop. std::vector<Loop*> LoopProcessWorklist; - SmallPtrSet<Value *,8> UnswitchedVals; + + // FIXME: Consider custom class for this. + std::map<const SwitchInst*, SmallPtrSet<const Value *,8> > UnswitchedVals; bool OptimizeForSize; bool redoLoop; @@ -117,7 +119,15 @@ namespace { private: virtual void releaseMemory() { - UnswitchedVals.clear(); + // We need to forget about all switches in the current loop. + // FIXME: Do it better than enumerating all blocks of code + // and see if it is a switch instruction. + for (Loop::block_iterator I = currentLoop->block_begin(), + E = currentLoop->block_end(); I != E; ++I) { + SwitchInst* SI = dyn_cast<SwitchInst>((*I)->getTerminator()); + if (SI) + UnswitchedVals.erase(SI); + } } /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist, @@ -128,6 +138,12 @@ namespace { if (I != LoopProcessWorklist.end()) LoopProcessWorklist.erase(I); } + + /// For new loop switches we clone info about values that was + /// already unswitched and has redundant successors. + /// Note, that new loop data is stored inside the VMap. + void CloneUnswitchedVals(const ValueToValueMapTy& VMap, + const BasicBlock* SrcBB); void initLoopData() { loopHeader = currentLoop->getHeader(); @@ -255,13 +271,25 @@ bool LoopUnswitch::processCurrentLoop() { } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), currentLoop, Changed); - if (LoopCond && SI->getNumCases() > 1) { + unsigned NumCases = SI->getNumCases(); + if (LoopCond && NumCases > 1) { // Find a value to unswitch on: // FIXME: this should chose the most expensive case! // FIXME: scan for a case with a non-critical edge? - Constant *UnswitchVal = SI->getCaseValue(1); + Constant *UnswitchVal = NULL; + // Do not process same value again and again. - if (!UnswitchedVals.insert(UnswitchVal)) + // At this point we have some cases already unswitched and + // some not yet unswitched. Let's find the first not yet unswitched one. + for (unsigned i = 1; i < NumCases; ++i) { + Constant* UnswitchValCandidate = SI->getCaseValue(i); + if (!UnswitchedVals[SI].count(UnswitchValCandidate)) { + UnswitchVal = UnswitchValCandidate; + break; + } + } + + if (!UnswitchVal) continue; if (UnswitchIfProfitable(LoopCond, UnswitchVal)) { @@ -287,6 +315,23 @@ bool LoopUnswitch::processCurrentLoop() { return Changed; } +/// For new loop switches we clone info about values that was +/// already unswitched and has redundant successors. +/// Not that new loop data is stored inside the VMap. +void LoopUnswitch::CloneUnswitchedVals(const ValueToValueMapTy& VMap, + const BasicBlock* SrcBB) { + + const SwitchInst* SI = dyn_cast<SwitchInst>(SrcBB->getTerminator()); + if (SI && UnswitchedVals.count(SI)) { + // Don't clone a totally simplified switch. + if (isa<Constant>(SI->getCondition())) + return; + Value* I = VMap.lookup(SI); + assert(I && "All instructions that are in SrcBB must be in VMap."); + UnswitchedVals[cast<SwitchInst>(I)] = UnswitchedVals[SI]; + } +} + /// isTrivialLoopExitBlock - Check to see if all paths from BB exit the /// loop with no side effects (including infinite loops). /// @@ -378,14 +423,25 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val, // Check to see if a successor of the switch is guaranteed to go to the // latch block or exit through a one exit block without having any // side-effects. If so, determine the value of Cond that causes it to do - // this. Note that we can't trivially unswitch on the default case. - for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) - if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, + // this. + // Note that we can't trivially unswitch on the default case or + // on already unswitched cases. + for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) { + BasicBlock* LoopExitCandidate; + if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop, SI->getSuccessor(i)))) { // Okay, we found a trivial case, remember the value that is trivial. - if (Val) *Val = SI->getCaseValue(i); + ConstantInt* CaseVal = SI->getCaseValue(i); + + // Check that it was not unswitched before, since already unswitched + // trivial vals are looks trivial too. + if (UnswitchedVals[SI].count(CaseVal)) + continue; + LoopExitBB = LoopExitCandidate; + if (Val) *Val = CaseVal; break; } + } } // If we didn't find a single unique LoopExit block, or if the loop exit block @@ -447,8 +503,14 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { // expansion, and the number of basic blocks, to avoid loops with // large numbers of branches which cause loop unswitching to go crazy. // This is a very ad-hoc heuristic. - if (Metrics.NumInsts > Threshold || - Metrics.NumBlocks * 5 > Threshold || + + unsigned NumUnswitched = + (NumSwitches + NumBranches) + 1 /*take in account current iteration*/; + + unsigned NumInsts = Metrics.NumInsts * NumUnswitched; + unsigned NumBlocks = Metrics.NumBlocks * NumUnswitched; + + if (NumInsts > Threshold || NumBlocks * 5 > Threshold || Metrics.containsIndirectBr || Metrics.isRecursive) { DEBUG(dbgs() << "NOT unswitching loop %" << currentLoop->getHeader()->getName() << ", cost too high: " @@ -620,6 +682,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, ValueToValueMapTy VMap; for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F); + + // Inherit simplified switches info for NewBB + // We needn't pass NewBB since its instructions are already contained + // inside the VMap. + CloneUnswitchedVals(VMap, LoopBlocks[i]); + NewBlocks.push_back(NewBB); VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); @@ -945,6 +1013,9 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, BasicBlock *Switch = SI->getParent(); BasicBlock *SISucc = SI->getSuccessor(DeadCase); BasicBlock *Latch = L->getLoopLatch(); + + UnswitchedVals[SI].insert(Val); + if (!SI->findCaseDest(SISucc)) continue; // Edge is critical. // If the DeadCase successor dominates the loop latch, then the // transformation isn't safe since it will delete the sole predecessor edge diff --git a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll new file mode 100644 index 0000000000..8389fe4643 --- /dev/null +++ b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll @@ -0,0 +1,91 @@ +; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s +; RUN: opt -S -loop-unswitch -verify-loop-info -verify-dom-info %s | FileCheck %s + +; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted +; STATS: 2 loop-unswitch - Number of switches unswitched + +; CHECK: %1 = icmp eq i32 %c, 1 +; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge + +; CHECK: ..split_crit_edge: ; preds = %0 +; CHECK-NEXT: br label %.split + +; CHECK: .split.us: ; preds = %0 +; CHECK-NEXT: br label %loop_begin.us + +; CHECK: loop_begin.us: ; preds = %loop_begin.backedge.us, %.split.us +; CHECK-NEXT: %var_val.us = load i32* %var +; CHECK-NEXT: switch i32 1, label %default.us-lcssa.us [ +; CHECK-NEXT: i32 1, label %inc.us + +; CHECK: inc.us: ; preds = %loop_begin.us +; CHECK-NEXT: call void @incf() noreturn nounwind +; CHECK-NEXT: br label %loop_begin.backedge.us + +; CHECK: .split: ; preds = %..split_crit_edge +; CHECK-NEXT: %2 = icmp eq i32 %c, 2 +; CHECK-NEXT: br i1 %2, label %.split.split.us, label %.split..split.split_crit_edge + +; CHECK: .split..split.split_crit_edge: ; preds = %.split +; CHECK-NEXT: br label %.split.split + +; CHECK: .split.split.us: ; preds = %.split +; CHECK-NEXT: br label %loop_begin.us1 + +; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us5, %.split.split.us +; CHECK-NEXT: %var_val.us2 = load i32* %var +; CHECK-NEXT: switch i32 2, label %default.us-lcssa.us-lcssa.us [ +; CHECK-NEXT: i32 1, label %inc.us3 +; CHECK-NEXT: i32 2, label %dec.us4 +; CHECK-NEXT: ] + +; CHECK: dec.us4: ; preds = %loop_begin.us1 +; CHECK-NEXT: call void @decf() noreturn nounwind +; CHECK-NEXT: br label %loop_begin.backedge.us5 + +; CHECK: .split.split: ; preds = %.split..split.split_crit_edge +; CHECK-NEXT: br label %loop_begin + +; CHECK: loop_begin: ; preds = %loop_begin.backedge, %.split.split +; CHECK-NEXT: %var_val = load i32* %var +; CHECK-NEXT: switch i32 %c, label %default.us-lcssa.us-lcssa [ +; CHECK-NEXT: i32 1, label %inc +; CHECK-NEXT: i32 2, label %dec +; CHECK-NEXT: ] + +; CHECK: inc: ; preds = %loop_begin +; CHECK-NEXT: br i1 true, label %us-unreachable.us-lcssa, label %inc.split + +; CHECK: dec: ; preds = %loop_begin +; CHECK-NEXT: br i1 true, label %us-unreachable6, label %dec.split + +define i32 @test(i32* %var) { + %mem = alloca i32 + store i32 2, i32* %mem + %c = load i32* %mem + + br label %loop_begin + +loop_begin: + + %var_val = load i32* %var + + switch i32 %c, label %default [ + i32 1, label %inc + i32 2, label %dec + ] + +inc: + call void @incf() noreturn nounwind + br label %loop_begin +dec: + call void @decf() noreturn nounwind + br label %loop_begin +default: + br label %loop_exit +loop_exit: + ret i32 0 +} + +declare void @incf() noreturn +declare void @decf() noreturn diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll new file mode 100644 index 0000000000..21c0ec3e22 --- /dev/null +++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll @@ -0,0 +1,84 @@ +; RUN: opt -loop-unswitch -loop-unswitch-threshold 30 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s +; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 30 -verify-loop-info -verify-dom-info %s | FileCheck %s + +; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted +; STATS: 1 loop-unswitch - Number of switches unswitched + +; ModuleID = '../llvm/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll' + +; CHECK: %1 = icmp eq i32 %c, 1 +; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge + +; CHECK: ..split_crit_edge: ; preds = %0 +; CHECK-NEXT: br label %.split + +; CHECK: .split.us: ; preds = %0 +; CHECK-NEXT: br label %loop_begin.us + +; CHECK: loop_begin.us: ; preds = %loop_begin.backedge.us, %.split.us +; CHECK: switch i32 1, label %second_switch.us [ +; CHECK-NEXT: i32 1, label %inc.us + +; CHECK: inc.us: ; preds = %second_switch.us, %loop_begin.us +; CHECK-NEXT: call void @incf() noreturn nounwind +; CHECK-NEXT: br label %loop_begin.backedge.us + +; CHECK: second_switch.us: ; preds = %loop_begin.us +; CHECK-NEXT: switch i32 %d, label %default.us [ +; CHECK-NEXT: i32 1, label %inc.us +; CHECK-NEXT: ] + +; CHECK: .split: ; preds = %..split_crit_edge +; CHECK-NEXT: br label %loop_begin + +; CHECK: loop_begin: ; preds = %loop_begin.backedge, %.split +; CHECK: switch i32 %c, label %second_switch [ +; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge +; CHECK-NEXT: ] + +; CHECK: loop_begin.inc_crit_edge: ; preds = %loop_begin +; CHECK-NEXT: br i1 true, label %us-unreachable, label %inc + +; CHECK: second_switch: ; preds = %loop_begin +; CHECK-NEXT: switch i32 %d, label %default [ +; CHECK-NEXT: i32 1, label %inc +; CHECK-NEXT: ] + +; CHECK: inc: ; preds = %loop_begin.inc_crit_edge, %second_switch +; CHECK-NEXT: call void @incf() noreturn nounwind +; CHECK-NEXT: br label %loop_begin.backedge + +define i32 @test(i32* %var) { + %mem = alloca i32 + store i32 2, i32* %mem + %c = load i32* %mem + %d = load i32* %mem + + br label %loop_begin + +loop_begin: + + %var_val = load i32* %var + + switch i32 %c, label %second_switch [ + i32 1, label %inc + ] + +second_switch: + switch i32 %d, label %default [ + i32 1, label %inc + ] + +inc: + call void @incf() noreturn nounwind + br label %loop_begin + +default: + br label %loop_begin + +loop_exit: + ret i32 0 +} + +declare void @incf() noreturn +declare void @decf() noreturn diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll new file mode 100644 index 0000000000..1b186d6bec --- /dev/null +++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll @@ -0,0 +1,138 @@ +; RUN: opt -loop-unswitch -loop-unswitch-threshold 1000 -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s +; RUN: opt -S -loop-unswitch -loop-unswitch-threshold 1000 -verify-loop-info -verify-dom-info %s | FileCheck %s + +; STATS: 1 loop-simplify - Number of pre-header or exit blocks inserted +; STATS: 3 loop-unswitch - Number of switches unswitched + +; CHECK: %1 = icmp eq i32 %c, 1 +; CHECK-NEXT: br i1 %1, label %.split.us, label %..split_crit_edge + +; CHECK: ..split_crit_edge: ; preds = %0 +; CHECK-NEXT: br label %.split + +; CHECK: .split.us: ; preds = %0 +; CHECK-NEXT: %2 = icmp eq i32 %d, 1 +; CHECK-NEXT: br i1 %2, label %.split.us.split.us, label %.split.us..split.us.split_crit_edge + +; CHECK: .split.us..split.us.split_crit_edge: ; preds = %.split.us +; CHECK-NEXT: br label %.split.us.split + +; CHECK: .split.us.split.us: ; preds = %.split.us +; CHECK-NEXT: br label %loop_begin.us.us + +; CHECK: loop_begin.us.us: ; preds = %loop_begin.backedge.us.us, %.split.us.split.us +; CHECK-NEXT: %var_val.us.us = load i32* %var +; CHECK-NEXT: switch i32 1, label %second_switch.us.us [ +; CHECK-NEXT: i32 1, label %inc.us.us + +; CHECK: inc.us.us: ; preds = %second_switch.us.us, %loop_begin.us.us +; CHECK-NEXT: call void @incf() noreturn nounwind +; CHECK-NEXT: br label %loop_begin.backedge.us.us + +; CHECK: second_switch.us.us: ; preds = %loop_begin.us.us +; CHECK-NEXT: switch i32 1, label %default.us.us [ +; CHECK-NEXT: i32 1, label %inc.us.us + +; CHECK: .split.us.split: ; preds = %.split.us..split.us.split_crit_edge +; CHECK-NEXT: br label %loop_begin.us + +; CHECK: loop_begin.us: ; preds = %loop_begin.backedge.us, %.split.us.split +; CHECK-NEXT: %var_val.us = load i32* %var +; CHECK-NEXT: switch i32 1, label %second_switch.us [ +; CHECK-NEXT: i32 1, label %inc.us + +; CHECK: inc.us: ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us +; CHECK-NEXT: call void @incf() noreturn nounwind +; CHECK-NEXT: br label %loop_begin.backedge.us + +; CHECK: second_switch.us: ; preds = %loop_begin.us +; CHECK-NEXT: switch i32 %d, label %default.us [ +; CHECK-NEXT: i32 1, label %second_switch.us.inc.us_crit_edge +; CHECK-NEXT: ] + +; CHECK: second_switch.us.inc.us_crit_edge: ; preds = %second_switch.us +; CHECK-NEXT: br i1 true, label %us-unreachable8, label %inc.us + +; CHECK: .split: ; preds = %..split_crit_edge +; CHECK-NEXT: %3 = icmp eq i32 %d, 1 +; CHECK-NEXT: br i1 %3, label %.split.split.us, label %.split..split.split_crit_edge + +; CHECK: .split..split.split_crit_edge: ; preds = %.split +; CHECK-NEXT: br label %.split.split + +; CHECK: .split.split.us: ; preds = %.split +; CHECK-NEXT: br label %loop_begin.us1 + +; CHECK: loop_begin.us1: ; preds = %loop_begin.backedge.us6, %.split.split.us +; CHECK-NEXT: %var_val.us2 = load i32* %var +; CHECK-NEXT: switch i32 %c, label %second_switch.us4 [ +; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge.us +; CHECK-NEXT: ] + +; CHECK: inc.us3: ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us4 +; CHECK-NEXT: call void @incf() noreturn nounwind +; CHECK-NEXT: br label %loop_begin.backedge.us6 + +; CHECK: second_switch.us4: ; preds = %loop_begin.us1 +; CHECK-NEXT: switch i32 1, label %default.us5 [ +; CHECK-NEXT: i32 1, label %inc.us3 +; CHECK-NEXT: ] + +; CHECK: loop_begin.inc_crit_edge.us: ; preds = %loop_begin.us1 +; CHECK-NEXT: br i1 true, label %us-unreachable.us-lcssa.us, label %inc.us3 + +; CHECK: .split.split: ; preds = %.split..split.split_crit_edge +; CHECK-NEXT: br label %loop_begin + +; CHECK: loop_begin: ; preds = %loop_begin.backedge, %.split.split +; CHECK-NEXT: %var_val = load i32* %var +; CHECK-NEXT: switch i32 %c, label %second_switch [ +; CHECK-NEXT: i32 1, label %loop_begin.inc_crit_edge +; CHECK-NEXT: ] + +; CHECK: loop_begin.inc_crit_edge: ; preds = %loop_begin +; CHECK-NEXT: br i1 true, label %us-unreachable.us-lcssa, label %inc + +; CHECK: second_switch: ; preds = %loop_begin +; CHECK-NEXT: switch i32 %d, label %default [ +; CHECK-NEXT: i32 1, label %second_switch.inc_crit_edge +; CHECK-NEXT: ] + +; CHECK: second_switch.inc_crit_edge: ; preds = %second_switch +; CHECK-NEXT: br i1 true, label %us-unreachable7, label %inc + + +define i32 @test(i32* %var) { + %mem = alloca i32 + store i32 2, i32* %mem + %c = load i32* %mem + %d = load i32* %mem + + br label %loop_begin + +loop_begin: + + %var_val = load i32* %var + + switch i32 %c, label %second_switch [ + i32 1, label %inc + ] + +second_switch: + switch i32 %d, label %default [ + i32 1, label %inc + ] + +inc: + call void @incf() noreturn nounwind + br label %loop_begin + +default: + br label %loop_begin + +loop_exit: + ret i32 0 +} + +declare void @incf() noreturn +declare void @decf() noreturn |