diff options
author | Evan Cheng <evan.cheng@apple.com> | 2008-12-19 18:03:11 +0000 |
---|---|---|
committer | Evan Cheng <evan.cheng@apple.com> | 2008-12-19 18:03:11 +0000 |
commit | ab63152871f4144050d0a58d592a95e089fe40d4 (patch) | |
tree | 0bb389d1de42d49c57d5daac76ef4f7ca0cf7378 | |
parent | a33649e98ce6512ed95a5e5f7b72dd28e243a289 (diff) |
- CodeGenPrepare does not split loop back edges but it only knows about back edges of single block loops. It now does a DFS walk to find loop back edges.
- Use SplitBlockPredecessors to factor out common predecessors of the critical edge destination. This is disabled for now due to some regressions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61248 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/Scalar/CodeGenPrepare.cpp | 180 | ||||
-rw-r--r-- | test/CodeGen/X86/critical-edge-split.ll | 50 | ||||
-rw-r--r-- | test/CodeGen/X86/remat-mov0.ll | 2 |
3 files changed, 186 insertions, 46 deletions
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 7079fa828b..ff9d32c316 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -30,6 +30,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GetElementPtrTypeIterator.h" @@ -37,11 +38,18 @@ using namespace llvm; using namespace llvm::PatternMatch; +static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak", + cl::init(false), cl::Hidden); + namespace { class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining /// transformation profitability. const TargetLowering *TLI; + + /// BackEdges - Keep a set of all the loop back edges. + /// + SmallSet<std::pair<BasicBlock*,BasicBlock*>, 8> BackEdges; public: static char ID; // Pass identification, replacement for typeid explicit CodeGenPrepare(const TargetLowering *tli = 0) @@ -58,6 +66,7 @@ namespace { bool OptimizeInlineAsmInst(Instruction *I, CallSite CS, DenseMap<Value*,Value*> &SunkAddrs); bool OptimizeExtUses(Instruction *I); + void findLoopBackEdges(Function &F); }; } @@ -69,10 +78,55 @@ FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { return new CodeGenPrepare(TLI); } +/// findLoopBackEdges - Do a DFS walk to find loop back edges. +/// +void CodeGenPrepare::findLoopBackEdges(Function &F) { + SmallPtrSet<BasicBlock*, 8> Visited; + SmallVector<std::pair<BasicBlock*, succ_iterator>, 8> VisitStack; + SmallPtrSet<BasicBlock*, 8> InStack; + + BasicBlock *BB = &F.getEntryBlock(); + if (succ_begin(BB) == succ_end(BB)) + return; + Visited.insert(BB); + VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); + InStack.insert(BB); + do { + std::pair<BasicBlock*, succ_iterator> &Top = VisitStack.back(); + BasicBlock *ParentBB = Top.first; + succ_iterator &I = Top.second; + + bool FoundNew = false; + while (I != succ_end(ParentBB)) { + BB = *I++; + if (Visited.insert(BB)) { + FoundNew = true; + break; + } + // Successor is in VisitStack, it's a back edge. + if (InStack.count(BB)) + BackEdges.insert(std::make_pair(ParentBB, BB)); + } + + if (FoundNew) { + // Go down one level if there is a unvisited successor. + InStack.insert(BB); + VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); + } else { + // Go up one level. + std::pair<BasicBlock*, succ_iterator> &Pop = VisitStack.back(); + InStack.erase(Pop.first); + VisitStack.pop_back(); + } + } while (!VisitStack.empty()); +} + bool CodeGenPrepare::runOnFunction(Function &F) { bool EverMadeChange = false; + findLoopBackEdges(F); + // First pass, eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); @@ -262,7 +316,9 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { /// phi nodes (otherwise critical edges are ok). If there is already another /// predecessor of the succ that is empty (and thus has no phi nodes), use it /// instead of introducing a new block. -static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, Pass *P) { +static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, + SmallSet<std::pair<BasicBlock*,BasicBlock*>, 8> &BackEdges, + Pass *P) { BasicBlock *TIBB = TI->getParent(); BasicBlock *Dest = TI->getSuccessor(SuccNum); assert(isa<PHINode>(Dest->begin()) && @@ -271,55 +327,90 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, Pass *P) { // As a hack, never split backedges of loops. Even though the copy for any // PHIs inserted on the backedge would be dead for exits from the loop, we // assume that the cost of *splitting* the backedge would be too high. - if (Dest == TIBB) + if (BackEdges.count(std::make_pair(TIBB, Dest))) return; - /// TIPHIValues - This array is lazily computed to determine the values of - /// PHIs in Dest that TI would provide. - SmallVector<Value*, 32> TIPHIValues; + if (!FactorCommonPreds) { + /// TIPHIValues - This array is lazily computed to determine the values of + /// PHIs in Dest that TI would provide. + SmallVector<Value*, 32> TIPHIValues; + + // Check to see if Dest has any blocks that can be used as a split edge for + // this terminator. + for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { + BasicBlock *Pred = *PI; + // To be usable, the pred has to end with an uncond branch to the dest. + BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); + if (!PredBr || !PredBr->isUnconditional() || + // Must be empty other than the branch. + &Pred->front() != PredBr || + // Cannot be the entry block; its label does not get emitted. + Pred == &(Dest->getParent()->getEntryBlock())) + continue; - // Check to see if Dest has any blocks that can be used as a split edge for - // this terminator. - for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { - BasicBlock *Pred = *PI; - // To be usable, the pred has to end with an uncond branch to the dest. - BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator()); - if (!PredBr || !PredBr->isUnconditional() || - // Must be empty other than the branch. - &Pred->front() != PredBr || - // Cannot be the entry block; its label does not get emitted. - Pred == &(Dest->getParent()->getEntryBlock())) - continue; + // Finally, since we know that Dest has phi nodes in it, we have to make + // sure that jumping to Pred will have the same affect as going to Dest in + // terms of PHI values. + PHINode *PN; + unsigned PHINo = 0; + bool FoundMatch = true; + for (BasicBlock::iterator I = Dest->begin(); + (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { + if (PHINo == TIPHIValues.size()) + TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); + + // If the PHI entry doesn't work, we can't use this pred. + if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { + FoundMatch = false; + break; + } + } - // Finally, since we know that Dest has phi nodes in it, we have to make - // sure that jumping to Pred will have the same affect as going to Dest in - // terms of PHI values. - PHINode *PN; - unsigned PHINo = 0; - bool FoundMatch = true; - for (BasicBlock::iterator I = Dest->begin(); - (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) { - if (PHINo == TIPHIValues.size()) - TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); - - // If the PHI entry doesn't work, we can't use this pred. - if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { - FoundMatch = false; - break; + // If we found a workable predecessor, change TI to branch to Succ. + if (FoundMatch) { + Dest->removePredecessor(TIBB); + TI->setSuccessor(SuccNum, Pred); + return; } } - // If we found a workable predecessor, change TI to branch to Succ. - if (FoundMatch) { - Dest->removePredecessor(TIBB); - TI->setSuccessor(SuccNum, Pred); - return; + SplitCriticalEdge(TI, SuccNum, P, true); + return; + } + + PHINode *PN; + SmallVector<Value*, 8> TIPHIValues; + for (BasicBlock::iterator I = Dest->begin(); + (PN = dyn_cast<PHINode>(I)); ++I) + TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB)); + + SmallVector<BasicBlock*, 8> IdenticalPreds; + for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) { + BasicBlock *Pred = *PI; + if (BackEdges.count(std::make_pair(Pred, Dest))) + continue; + if (PI == TIBB) + IdenticalPreds.push_back(Pred); + else { + bool Identical = true; + unsigned PHINo = 0; + for (BasicBlock::iterator I = Dest->begin(); + (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) + if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) { + Identical = false; + break; + } + if (Identical) + IdenticalPreds.push_back(Pred); } } - SplitCriticalEdge(TI, SuccNum, P, true); + assert(!IdenticalPreds.empty()); + SplitBlockPredecessors(Dest, &IdenticalPreds[0], IdenticalPreds.size(), + ".critedge", P); } + /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop /// copy (e.g. it's casting from one pointer type to another, int->uint, or /// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual @@ -1350,17 +1441,16 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { bool MadeChange = false; - // Split all critical edges where the dest block has a PHI and where the phi - // has shared immediate operands. + // Split all critical edges where the dest block has a PHI. TerminatorInst *BBTI = BB.getTerminator(); if (BBTI->getNumSuccessors() > 1) { - for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) - if (isa<PHINode>(BBTI->getSuccessor(i)->begin()) && - isCriticalEdge(BBTI, i, true)) - SplitEdgeNicely(BBTI, i, this); + for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { + BasicBlock *SuccBB = BBTI->getSuccessor(i); + if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) + SplitEdgeNicely(BBTI, i, BackEdges, this); + } } - // Keep track of non-local addresses that have been sunk into this block. // This allows us to avoid inserting duplicate code for blocks with multiple // load/stores of the same address. diff --git a/test/CodeGen/X86/critical-edge-split.ll b/test/CodeGen/X86/critical-edge-split.ll new file mode 100644 index 0000000000..7b83ecbc5c --- /dev/null +++ b/test/CodeGen/X86/critical-edge-split.ll @@ -0,0 +1,50 @@ +; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -stats -info-output-file - | grep asm-printer | grep 31 + + %CC = type { %Register } + %II = type { %"struct.XX::II::$_74" } + %JITFunction = type %YYValue* (%CC*, %YYValue**) + %YYValue = type { i32 (...)** } + %Register = type { %"struct.XX::ByteCodeFeatures" } + %"struct.XX::ByteCodeFeatures" = type { i32 } + %"struct.XX::II::$_74" = type { i8* } +@llvm.used = appending global [1 x i8*] [ i8* bitcast (%JITFunction* @loop to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] + +define %YYValue* @loop(%CC*, %YYValue**) nounwind { +; <label>:2 + %3 = getelementptr %CC* %0, i32 -9 ; <%CC*> [#uses=1] + %4 = bitcast %CC* %3 to %YYValue** ; <%YYValue**> [#uses=2] + %5 = load %YYValue** %4 ; <%YYValue*> [#uses=3] + %unique_1.i = ptrtoint %YYValue* %5 to i1 ; <i1> [#uses=1] + br i1 %unique_1.i, label %loop, label %11 + +loop: ; preds = %6, %2 + %.1 = phi %YYValue* [ inttoptr (i32 1 to %YYValue*), %2 ], [ %intAddValue, %6 ] ; <%YYValue*> [#uses=3] + %immediateCmp = icmp slt %YYValue* %.1, %5 ; <i1> [#uses=1] + br i1 %immediateCmp, label %6, label %8 + +; <label>:6 ; preds = %loop + %lhsInt = ptrtoint %YYValue* %.1 to i32 ; <i32> [#uses=1] + %7 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %lhsInt, i32 2) ; <{ i32, i1 }> [#uses=2] + %intAdd = extractvalue { i32, i1 } %7, 0 ; <i32> [#uses=1] + %intAddValue = inttoptr i32 %intAdd to %YYValue* ; <%YYValue*> [#uses=1] + %intAddOverflow = extractvalue { i32, i1 } %7, 1 ; <i1> [#uses=1] + br i1 %intAddOverflow, label %.loopexit, label %loop + +; <label>:8 ; preds = %loop + ret %YYValue* inttoptr (i32 10 to %YYValue*) + +.loopexit: ; preds = %6 + %9 = bitcast %CC* %0 to %YYValue** ; <%YYValue**> [#uses=1] + store %YYValue* %.1, %YYValue** %9 + store %YYValue* %5, %YYValue** %4 + %10 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431104 to %II*), %CC* %0, %YYValue** %1) ; <%YYValue*> [#uses=1] + ret %YYValue* %10 + +; <label>:11 ; preds = %2 + %12 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431080 to %II*), %CC* %0, %YYValue** %1) ; <%YYValue*> [#uses=1] + ret %YYValue* %12 +} + +declare fastcc %YYValue* @foobar(%II*, %CC*, %YYValue**) nounwind + +declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind diff --git a/test/CodeGen/X86/remat-mov0.ll b/test/CodeGen/X86/remat-mov0.ll index 360628cb6a..a50c8f3fa9 100644 --- a/test/CodeGen/X86/remat-mov0.ll +++ b/test/CodeGen/X86/remat-mov0.ll @@ -1,4 +1,4 @@ -; RUN: llvm-as < %s | llc -march=x86 | grep xor | count 2 +; RUN: llvm-as < %s | llc -march=x86 | grep xor | count 1 %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } %struct.ImgT = type { i8, i8*, i8*, %struct.FILE*, i32, i32, i32, i32, i8*, double*, float*, float*, float*, i32*, double, double, i32*, double*, i32*, i32* } |