R600: New control flow for SI v2

This patch replaces the control flow handling with a new pass which structurize the graph before transforming it to machine instruction. This has a couple of different advantages and currently fixes 20 piglit tests without a single regression. It is now a general purpose transformation that could be not only be used for SI/R6xx, but also for other hardware implementations that use a form of structurized control flow. v2: further cleanup, fixes and documentation Patch by: Christian König Signed-off-by: Christian König <deathsimple@vodafone.de> Reviewed-by: Tom Stellard <thomas.stellard@amd.com> Tested-by: Michel Dänzer <michel.daenzer@amd.com> git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170591 91177308-0d34-0410-b5e6-96231b3b80d8
author: Tom Stellard <thomas.stellard@amd.com> 2012-12-19 22:10:31 +0000
committer: Tom Stellard <thomas.stellard@amd.com> 2012-12-19 22:10:31 +0000
commit: 6b7d99d47321ebb478b22afd2e317fe89d2149db (patch)
tree: 548126753462b8985bf846b022d39b2c51e96f96 /lib
parent: 6eebe47060eec7e3a4ae95d4b4835869108f9c07 (diff)
14 files changed, 1500 insertions, 385 deletions
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 40864b09dd..0f5125d39b 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -25,13 +25,14 @@ FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
 FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
 
 // SI Passes
+FunctionPass *createSIAnnotateControlFlowPass();
 FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
 FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
 FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
 FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
-FunctionPass *createSIFixSGPRLivenessPass(TargetMachine &tm);
 
 // Passes common to R600 and SI
+Pass *createAMDGPUStructurizeCFGPass();
 FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
 
 } // End namespace llvm
diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
new file mode 100644
index 0000000000..686c3dccd3
--- /dev/null
+++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
@@ -0,0 +1,732 @@
+//===-- AMDGPUStructurizeCFG.cpp -  ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// The pass implemented in this file transforms the programs control flow
+/// graph into a form that's suitable for code generation on hardware that
+/// implements control flow by execution masking. This currently includes all
+/// AMD GPUs but may as well be useful for other types of hardware.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+using namespace llvm;
+
+namespace {
+
+// Definition of the complex types used in this pass.
+
+typedef std::pair<BasicBlock *, Value *> BBValuePair;
+typedef ArrayRef<BasicBlock*> BBVecRef;
+
+typedef SmallVector<RegionNode*, 8> RNVector;
+typedef SmallVector<BasicBlock*, 8> BBVector;
+typedef SmallVector<BBValuePair, 2> BBValueVector;
+
+typedef DenseMap<PHINode *, BBValueVector> PhiMap;
+typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
+typedef DenseMap<BasicBlock *, Value *> BBPredicates;
+typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
+typedef DenseMap<BasicBlock *, unsigned> VisitedMap;
+
+// The name for newly created blocks.
+
+static const char *FlowBlockName = "Flow";
+
+/// @brief Transforms the control flow graph on one single entry/exit region
+/// at a time.
+///
+/// After the transform all "If"/"Then"/"Else" style control flow looks like
+/// this:
+///
+/// \verbatim
+/// 1
+/// ||
+/// | |
+/// 2 |
+/// | /
+/// |/   
+/// 3
+/// ||   Where:
+/// | |  1 = "If" block, calculates the condition
+/// 4 |  2 = "Then" subregion, runs if the condition is true
+/// | /  3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
+/// |/   4 = "Else" optional subregion, runs if the condition is false
+/// 5    5 = "End" block, also rejoins the control flow
+/// \endverbatim
+///
+/// Control flow is expressed as a branch where the true exit goes into the
+/// "Then"/"Else" region, while the false exit skips the region
+/// The condition for the optional "Else" region is expressed as a PHI node.
+/// The incomming values of the PHI node are true for the "If" edge and false
+/// for the "Then" edge.
+///
+/// Additionally to that even complicated loops look like this:
+///
+/// \verbatim
+/// 1
+/// ||
+/// | |
+/// 2 ^  Where:
+/// | /  1 = "Entry" block
+/// |/   2 = "Loop" optional subregion, with all exits at "Flow" block
+/// 3    3 = "Flow" block, with back edge to entry block
+/// |
+/// \endverbatim
+///
+/// The back edge of the "Flow" block is always on the false side of the branch
+/// while the true side continues the general flow. So the loop condition
+/// consist of a network of PHI nodes where the true incoming values expresses
+/// breaks and the false values expresses continue states.
+class AMDGPUStructurizeCFG : public RegionPass {
+
+  static char ID;
+
+  Type *Boolean;
+  ConstantInt *BoolTrue;
+  ConstantInt *BoolFalse;
+  UndefValue *BoolUndef;
+
+  Function *Func;
+  Region *ParentRegion;
+
+  DominatorTree *DT;
+
+  RNVector Order;
+  VisitedMap Visited;
+  PredMap Predicates;
+  BBPhiMap DeletedPhis;
+  BBVector FlowsInserted;
+
+  BasicBlock *LoopStart;
+  BasicBlock *LoopEnd;
+  BBPredicates LoopPred;
+
+  void orderNodes();
+
+  void buildPredicate(BranchInst *Term, unsigned Idx,
+                      BBPredicates &Pred, bool Invert);
+
+  void analyzeBlock(BasicBlock *BB);
+
+  void analyzeLoop(BasicBlock *BB, unsigned &LoopIdx);
+
+  void collectInfos();
+
+  bool dominatesPredicates(BasicBlock *A, BasicBlock *B);
+
+  void killTerminator(BasicBlock *BB);
+
+  RegionNode *skipChained(RegionNode *Node);
+
+  void delPhiValues(BasicBlock *From, BasicBlock *To);
+
+  void addPhiValues(BasicBlock *From, BasicBlock *To);
+
+  BasicBlock *getNextFlow(BasicBlock *Prev);
+
+  bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node);
+
+  BasicBlock *wireFlowBlock(BasicBlock *Prev, RegionNode *Node);
+
+  void createFlow();
+
+  void insertConditions();
+
+  void rebuildSSA();
+
+public:
+  AMDGPUStructurizeCFG():
+    RegionPass(ID) {
+
+    initializeRegionInfoPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual bool doInitialization(Region *R, RGPassManager &RGM);
+
+  virtual bool runOnRegion(Region *R, RGPassManager &RGM);
+
+  virtual const char *getPassName() const {
+    return "AMDGPU simplify control flow";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const {
+
+    AU.addRequired<DominatorTree>();
+    AU.addPreserved<DominatorTree>();
+    RegionPass::getAnalysisUsage(AU);
+  }
+
+};
+
+} // end anonymous namespace
+
+char AMDGPUStructurizeCFG::ID = 0;
+
+/// \brief Initialize the types and constants used in the pass
+bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
+
+  LLVMContext &Context = R->getEntry()->getContext();
+
+  Boolean = Type::getInt1Ty(Context);
+  BoolTrue = ConstantInt::getTrue(Context);
+  BoolFalse = ConstantInt::getFalse(Context);
+  BoolUndef = UndefValue::get(Boolean);
+
+  return false;
+}
+
+/// \brief Build up the general order of nodes
+void AMDGPUStructurizeCFG::orderNodes() {
+
+  scc_iterator<Region *> I = scc_begin(ParentRegion),
+                         E = scc_end(ParentRegion);
+  for (Order.clear(); I != E; ++I) {
+    std::vector<RegionNode *> &Nodes = *I;
+    Order.append(Nodes.begin(), Nodes.end());
+  }
+}
+
+/// \brief Build blocks and loop predicates
+void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx,
+                                          BBPredicates &Pred, bool Invert) {
+
+  Value *True = Invert ? BoolFalse : BoolTrue;
+  Value *False = Invert ? BoolTrue : BoolFalse;
+
+  RegionInfo *RI = ParentRegion->getRegionInfo();
+  BasicBlock *BB = Term->getParent();
+
+  // Handle the case where multiple regions start at the same block
+  Region *R = BB != ParentRegion->getEntry() ?
+              RI->getRegionFor(BB) : ParentRegion;
+
+  if (R == ParentRegion) {
+    // It's a top level block in our region
+    Value *Cond = True;
+    if (Term->isConditional()) {
+      BasicBlock *Other = Term->getSuccessor(!Idx);
+
+      if (Visited.count(Other)) {
+        if (!Pred.count(Other))
+          Pred[Other] = False;
+
+        if (!Pred.count(BB))
+          Pred[BB] = True;
+        return;
+      }
+      Cond = Term->getCondition();
+
+      if (Idx != Invert)
+        Cond = BinaryOperator::CreateNot(Cond, "", Term);
+    }
+
+    Pred[BB] = Cond;
+
+  } else if (ParentRegion->contains(R)) {
+    // It's a block in a sub region
+    while(R->getParent() != ParentRegion)
+      R = R->getParent();
+
+    Pred[R->getEntry()] = True;
+
+  } else {
+    // It's a branch from outside into our parent region
+    Pred[BB] = True;
+  }
+}
+
+/// \brief Analyze the successors of each block and build up predicates
+void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) {
+
+  pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+  BBPredicates &Pred = Predicates[BB];
+
+  for (; PI != PE; ++PI) {
+    BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
+
+    for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+      BasicBlock *Succ = Term->getSuccessor(i);
+      if (Succ != BB)
+        continue;
+      buildPredicate(Term, i, Pred, false);
+    }
+  }
+}
+
+/// \brief Analyze the conditions leading to loop to a previous block
+void AMDGPUStructurizeCFG::analyzeLoop(BasicBlock *BB, unsigned &LoopIdx) {
+
+  BranchInst *Term = cast<BranchInst>(BB->getTerminator());
+
+  for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+    BasicBlock *Succ = Term->getSuccessor(i);
+
+    // Ignore it if it's not a back edge
+    if (!Visited.count(Succ))
+      continue;
+
+    buildPredicate(Term, i, LoopPred, true);
+
+    LoopEnd = BB;
+    if (Visited[Succ] < LoopIdx) {
+      LoopIdx = Visited[Succ];
+      LoopStart = Succ;
+    }
+  }
+}
+
+/// \brief Collect various loop and predicate infos
+void AMDGPUStructurizeCFG::collectInfos() {
+
+  unsigned Number = 0, LoopIdx = ~0;
+
+  // Reset predicate
+  Predicates.clear();
+
+  // and loop infos
+  LoopStart = LoopEnd = 0;
+  LoopPred.clear();
+
+  RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
+  for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) {
+
+    // Analyze all the conditions leading to a node
+    analyzeBlock((*OI)->getEntry());
+
+    if ((*OI)->isSubRegion())
+      continue;
+
+    // Find the first/last loop nodes and loop predicates
+    analyzeLoop((*OI)->getNodeAs<BasicBlock>(), LoopIdx);
+  }
+}
+
+/// \brief Does A dominate all the predicates of B ?
+bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) {
+
+  BBPredicates &Preds = Predicates[B];
+  for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
+       PI != PE; ++PI) {
+
+    if (!DT->dominates(A, PI->first))
+      return false;
+  }
+  return true;
+}
+
+/// \brief Remove phi values from all successors and the remove the terminator.
+void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) {
+
+  TerminatorInst *Term = BB->getTerminator();
+  if (!Term)
+    return;
+
+  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
+       SI != SE; ++SI) {
+
+    delPhiValues(BB, *SI);
+  }
+
+  Term->eraseFromParent();
+}
+
+/// First: Skip forward to the first region node that either isn't a subregion or not
+/// dominating it's exit, remove all the skipped nodes from the node order.
+///
+/// Second: Handle the first successor directly if the resulting nodes successor
+/// predicates are still dominated by the original entry
+RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) {
+
+  BasicBlock *Entry = Node->getEntry();
+
+  // Skip forward as long as it is just a linear flow
+  while (true) {
+    BasicBlock *Entry = Node->getEntry();
+    BasicBlock *Exit;
+
+    if (Node->isSubRegion()) {
+      Exit = Node->getNodeAs<Region>()->getExit();
+    } else {
+      TerminatorInst *Term = Entry->getTerminator();
+      if (Term->getNumSuccessors() != 1)
+        break;
+      Exit = Term->getSuccessor(0);
+    }
+
+    // It's a back edge, break here so we can insert a loop node
+    if (!Visited.count(Exit))
+      return Node;
+
+    // More than node edges are pointing to exit
+    if (!DT->dominates(Entry, Exit))
+      return Node;
+
+    RegionNode *Next = ParentRegion->getNode(Exit);
+    RNVector::iterator I = std::find(Order.begin(), Order.end(), Next);
+    assert(I != Order.end());
+
+    Visited.erase(Next->getEntry());
+    Order.erase(I);
+    Node = Next;
+  }
+
+  BasicBlock *BB = Node->getEntry();
+  TerminatorInst *Term = BB->getTerminator();
+  if (Term->getNumSuccessors() != 2)
+    return Node;
+
+  // Our node has exactly two succesors, check if we can handle
+  // any of them directly
+  BasicBlock *Succ = Term->getSuccessor(0);
+  if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) {
+    Succ = Term->getSuccessor(1);
+    if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ))
+      return Node;
+  } else {
+    BasicBlock *Succ2 = Term->getSuccessor(1);
+    if (Visited.count(Succ2) && Visited[Succ] > Visited[Succ2] &&
+        dominatesPredicates(Entry, Succ2))
+      Succ = Succ2;
+  }
+
+  RegionNode *Next = ParentRegion->getNode(Succ);
+  RNVector::iterator E = Order.end();
+  RNVector::iterator I = std::find(Order.begin(), E, Next);
+  assert(I != E);
+
+  killTerminator(BB);
+  FlowsInserted.push_back(BB);
+  Visited.erase(Succ);
+  Order.erase(I);
+  return ParentRegion->getNode(wireFlowBlock(BB, Next));
+}
+
+/// \brief Remove all PHI values coming from "From" into "To" and remember
+/// them in DeletedPhis
+void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
+
+  PhiMap &Map = DeletedPhis[To];
+  for (BasicBlock::iterator I = To->begin(), E = To->end();
+       I != E && isa<PHINode>(*I);) {
+
+    PHINode &Phi = cast<PHINode>(*I++);
+    while (Phi.getBasicBlockIndex(From) != -1) {
+      Value *Deleted = Phi.removeIncomingValue(From, false);
+      Map[&Phi].push_back(std::make_pair(From, Deleted));
+    }
+  }
+}
+
+/// \brief Add the PHI values back once we knew the new predecessor
+void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
+
+  if (!DeletedPhis.count(To))
+    return;
+
+  PhiMap &Map = DeletedPhis[To];
+  SSAUpdater Updater;
+
+  for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
+
+    PHINode *Phi = I->first;
+    Updater.Initialize(Phi->getType(), "");
+    BasicBlock *Fallback = To;
+    bool HaveFallback = false;
+
+    for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end();
+         VI != VE; ++VI) {
+
+      Updater.AddAvailableValue(VI->first, VI->second);
+      BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first);
+      if (Dom == VI->first)
+        HaveFallback = true;
+      else if (Dom != Fallback)
+        HaveFallback = false;
+      Fallback = Dom;
+    }
+    if (!HaveFallback) {
+      Value *Undef = UndefValue::get(Phi->getType());
+      Updater.AddAvailableValue(Fallback, Undef);
+    }
+
+    Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From);
+  }
+  DeletedPhis.erase(To);
+}
+
+/// \brief Create a new flow node and update dominator tree and region info
+BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) {
+
+  LLVMContext &Context = Func->getContext();
+  BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
+                       Order.back()->getEntry();
+  BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
+                                        Func, Insert);
+  DT->addNewBlock(Flow, Prev);
+  ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
+  FlowsInserted.push_back(Flow);
+  return Flow;
+}
+
+/// \brief Can we predict that this node will always be called?
+bool AMDGPUStructurizeCFG::isPredictableTrue(BasicBlock *Prev,
+                                             BasicBlock *Node) {
+
+  BBPredicates &Preds = Predicates[Node];
+  bool Dominated = false;
+
+  for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
+       I != E; ++I) {
+
+    if (I->second != BoolTrue)
+      return false;
+
+    if (!Dominated && DT->dominates(I->first, Prev))
+      Dominated = true;
+  }
+  return Dominated;
+}
+
+/// \brief Wire up the new control flow by inserting or updating the branch
+/// instructions at node exits
+BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev,
+                                                RegionNode *Node) {
+
+  BasicBlock *Entry = Node->getEntry();
+
+  if (LoopStart == Entry) {
+    LoopStart = Prev;
+    LoopPred[Prev] = BoolTrue;
+  }
+
+  // Wire it up temporary, skipChained may recurse into us
+  BranchInst::Create(Entry, Prev);
+  DT->changeImmediateDominator(Entry, Prev);
+  addPhiValues(Prev, Entry);
+
+  Node = skipChained(Node);
+
+  BasicBlock *Next = getNextFlow(Prev);
+  if (!isPredictableTrue(Prev, Entry)) {
+    // Let Prev point to entry and next block
+    Prev->getTerminator()->eraseFromParent();
+    BranchInst::Create(Entry, Next, BoolUndef, Prev);
+  } else {
+    DT->changeImmediateDominator(Next, Entry);
+  }
+
+  // Let node exit(s) point to next block
+  if (Node->isSubRegion()) {
+    Region *SubRegion = Node->getNodeAs<Region>();
+    BasicBlock *Exit = SubRegion->getExit();
+
+    // Find all the edges from the sub region to the exit
+    BBVector ToDo;
+    for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
+      if (SubRegion->contains(*I))
+        ToDo.push_back(*I);
+    }
+
+    // Modify the edges to point to the new flow block
+    for (BBVector::iterator I = ToDo.begin(), E = ToDo.end(); I != E; ++I) {
+      delPhiValues(*I, Exit);
+      TerminatorInst *Term = (*I)->getTerminator();
+      Term->replaceUsesOfWith(Exit, Next);
+    }
+
+    // Update the region info
+    SubRegion->replaceExit(Next);
+
+  } else {
+    BasicBlock *BB = Node->getNodeAs<BasicBlock>();
+    killTerminator(BB);
+    BranchInst::Create(Next, BB);
+
+    if (BB == LoopEnd)
+      LoopEnd = 0;
+  }
+
+  return Next;
+}
+
+/// Destroy node order and visited map, build up flow order instead.
+/// After this function control flow looks like it should be, but
+/// branches only have undefined conditions.
+void AMDGPUStructurizeCFG::createFlow() {
+
+  DeletedPhis.clear();
+
+  BasicBlock *Prev = Order.pop_back_val()->getEntry();
+  assert(Prev == ParentRegion->getEntry() && "Incorrect node order!");
+  Visited.erase(Prev);
+
+  if (LoopStart == Prev) {
+    // Loop starts at entry, split entry so that we can predicate it
+    BasicBlock::iterator Insert = Prev->getFirstInsertionPt();
+    BasicBlock *Split = Prev->splitBasicBlock(Insert, FlowBlockName);
+    DT->addNewBlock(Split, Prev);
+    ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
+    Predicates[Split] = Predicates[Prev];
+    Order.push_back(ParentRegion->getBBNode(Split));
+    LoopPred[Prev] = BoolTrue;
+
+  } else if (LoopStart == Order.back()->getEntry()) {
+    // Loop starts behind entry, split entry so that we can jump to it
+    Instruction *Term = Prev->getTerminator();
+    BasicBlock *Split = Prev->splitBasicBlock(Term, FlowBlockName);
+    DT->addNewBlock(Split, Prev);
+    ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
+    Prev = Split;
+  }
+
+  killTerminator(Prev);
+  FlowsInserted.clear();
+  FlowsInserted.push_back(Prev);
+
+  while (!Order.empty()) {
+    RegionNode *Node = Order.pop_back_val();
+    Visited.erase(Node->getEntry());
+    Prev = wireFlowBlock(Prev, Node);
+    if (LoopStart && !LoopEnd) {
+      // Create an extra loop end node
+      LoopEnd = Prev;
+      Prev = getNextFlow(LoopEnd);
+      BranchInst::Create(Prev, LoopStart, BoolUndef, LoopEnd);
+      addPhiValues(LoopEnd, LoopStart);
+    }
+  }
+
+  BasicBlock *Exit = ParentRegion->getExit();
+  BranchInst::Create(Exit, Prev);
+  addPhiValues(Prev, Exit);
+  if (DT->dominates(ParentRegion->getEntry(), Exit))
+    DT->changeImmediateDominator(Exit, Prev);
+
+  if (LoopStart && LoopEnd) {
+    BBVector::iterator FI = std::find(FlowsInserted.begin(),
+                                      FlowsInserted.end(),
+                                      LoopStart);
+    for (; *FI != LoopEnd; ++FI) {
+      addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0));
+    }
+  }
+
+  assert(Order.empty());
+  assert(Visited.empty());
+  assert(DeletedPhis.empty());
+}
+
+/// \brief Insert the missing branch conditions
+void AMDGPUStructurizeCFG::insertConditions() {
+
+  SSAUpdater PhiInserter;
+
+  for (BBVector::iterator FI = FlowsInserted.begin(), FE = FlowsInserted.end();
+       FI != FE; ++FI) {
+
+    BranchInst *Term = cast<BranchInst>((*FI)->getTerminator());
+    if (Term->isUnconditional())
+      continue;
+
+    PhiInserter.Initialize(Boolean, "");
+    PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse);
+
+    BasicBlock *Succ = Term->getSuccessor(0);
+    BBPredicates &Preds = (*FI == LoopEnd) ? LoopPred : Predicates[Succ];
+    for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
+         PI != PE; ++PI) {
+
+      PhiInserter.AddAvailableValue(PI->first, PI->second);
+    }
+
+    Term->setCondition(PhiInserter.GetValueAtEndOfBlock(*FI));
+  }
+}
+
+/// Handle a rare case where the disintegrated nodes instructions
+/// no longer dominate all their uses. Not sure if this is really nessasary
+void AMDGPUStructurizeCFG::rebuildSSA() {
+
+  SSAUpdater Updater;
+  for (Region::block_iterator I = ParentRegion->block_begin(),
+                              E = ParentRegion->block_end();
+       I != E; ++I) {
+
+    BasicBlock *BB = *I;
+    for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
+         II != IE; ++II) {
+
+      bool Initialized = false;
+      for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) {
+
+        Next = I->getNext();
+
+        Instruction *User = cast<Instruction>(I->getUser());
+        if (User->getParent() == BB) {
+          continue;
+
+        } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+          if (UserPN->getIncomingBlock(*I) == BB)
+            continue;
+        }
+
+        if (DT->dominates(II, User))
+          continue;
+
+        if (!Initialized) {
+          Value *Undef = UndefValue::get(II->getType());
+          Updater.Initialize(II->getType(), "");
+          Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
+          Updater.AddAvailableValue(BB, II);
+          Initialized = true;
+        }
+        Updater.RewriteUseAfterInsertions(*I);
+      }
+    }
+  }
+}
+
+/// \brief Run the transformation for each region found
+bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
+
+  if (R->isTopLevelRegion())
+    return false;
+
+  Func = R->getEntry()->getParent();
+  ParentRegion = R;
+
+  DT = &getAnalysis<DominatorTree>();
+
+  orderNodes();
+  collectInfos();
+  createFlow();
+  insertConditions();
+  rebuildSSA();
+
+  Order.clear();
+  Visited.clear();
+  Predicates.clear();
+  DeletedPhis.clear();
+  FlowsInserted.clear();
+
+  return true;
+}
+
+/// \brief Create the pass
+Pass *llvm::createAMDGPUStructurizeCFGPass() {
+  return new AMDGPUStructurizeCFG();
+}
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index 98a3064f7e..d09dc2efff 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -91,6 +91,11 @@ TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
 
 bool
 AMDGPUPassConfig::addPreISel() {
+  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
+  if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
+    addPass(createAMDGPUStructurizeCFGPass());
+    addPass(createSIAnnotateControlFlowPass());
+  }
   return false;
 }
 
@@ -107,9 +112,6 @@ bool AMDGPUPassConfig::addPreRegAlloc() {
     addPass(createSIAssignInterpRegsPass(*TM));
   }
   addPass(createAMDGPUConvertToISAPass(*TM));
-  if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
-    addPass(createSIFixSGPRLivenessPass(*TM));
-  }
   return false;
 }
 
@@ -124,11 +126,10 @@ bool AMDGPUPassConfig::addPreSched2() {
 }
 
 bool AMDGPUPassConfig::addPreEmitPass() {
-  addPass(createAMDGPUCFGPreparationPass(*TM));
-  addPass(createAMDGPUCFGStructurizerPass(*TM));
-
   const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
   if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
+    addPass(createAMDGPUCFGPreparationPass(*TM));
+    addPass(createAMDGPUCFGStructurizerPass(*TM));
     addPass(createR600ExpandSpecialInstrsPass(*TM));
     addPass(&FinalizeMachineBundlesID);
   } else {
diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
index 1f276dc570..568d281e63 100644
--- a/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -2596,7 +2596,6 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
     case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
     case AMDGPU::BRANCH_COND_i32:
     case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
-    case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ;
     default:
       assert(0 && "internal error");
     }
@@ -2608,7 +2607,6 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
     case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
     case AMDGPU::BRANCH_COND_i32:
     case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
-    case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z;
     default:
       assert(0 && "internal error");
     }
@@ -2658,8 +2656,6 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
         return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
       case AMDGPU::BRANCH_COND_i32:
       case AMDGPU::BRANCH_COND_f32:
-      case AMDGPU::SI_IF_NZ:
-      case AMDGPU::SI_IF_Z:
       break;
     default:
       return false;
diff --git a/lib/Target/R600/AMDILInstrInfo.td b/lib/Target/R600/AMDILInstrInfo.td
index ac6745148e..e969bbf8ca 100644
--- a/lib/Target/R600/AMDILInstrInfo.td
+++ b/lib/Target/R600/AMDILInstrInfo.td
@@ -206,68 +206,3 @@ multiclass BranchInstr2<string name> {
 // Intrinsics support
 //===--------------------------------------------------------------------===//
 include "AMDILIntrinsics.td"
-
-//===--------------------------------------------------------------------===//
-// Instructions support
-//===--------------------------------------------------------------------===//
-//===---------------------------------------------------------------------===//
-// Custom Inserter for Branches and returns, this eventually will be a
-// seperate pass
-//===---------------------------------------------------------------------===//
-let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
-  def BRANCH : ILFormat<(outs), (ins brtarget:$target),
-      "; Pseudo unconditional branch instruction",
-      [(br bb:$target)]>;
-  defm BRANCH_COND : BranchConditional<IL_brcond>;
-}
-
-//===---------------------------------------------------------------------===//
-// Flow and Program control Instructions
-//===---------------------------------------------------------------------===//
-let isTerminator=1 in {
-  def SWITCH      : ILFormat< (outs), (ins GPRI32:$src),
-  !strconcat("SWITCH", " $src"), []>;
-  def CASE        : ILFormat< (outs), (ins GPRI32:$src),
-      !strconcat("CASE", " $src"), []>;
-  def BREAK       : ILFormat< (outs), (ins),
-      "BREAK", []>;
-  def CONTINUE    : ILFormat< (outs), (ins),
-      "CONTINUE", []>;
-  def DEFAULT     : ILFormat< (outs), (ins),
-      "DEFAULT", []>;
-  def ELSE        : ILFormat< (outs), (ins),
-      "ELSE", []>;
-  def ENDSWITCH   : ILFormat< (outs), (ins),
-      "ENDSWITCH", []>;
-  def ENDMAIN     : ILFormat< (outs), (ins),
-      "ENDMAIN", []>;
-  def END         : ILFormat< (outs), (ins),
-      "END", []>;
-  def ENDFUNC     : ILFormat< (outs), (ins),
-      "ENDFUNC", []>;
-  def ENDIF       : ILFormat< (outs), (ins),
-      "ENDIF", []>;
-  def WHILELOOP   : ILFormat< (outs), (ins),
-      "WHILE", []>;
-  def ENDLOOP     : ILFormat< (outs), (ins),
-      "ENDLOOP", []>;
-  def FUNC        : ILFormat< (outs), (ins),
-      "FUNC", []>;
-  def RETDYN      : ILFormat< (outs), (ins),
-      "RET_DYN", []>;
-  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
-  defm IF_LOGICALNZ  : BranchInstr<"IF_LOGICALNZ">;
-  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
-  defm IF_LOGICALZ   : BranchInstr<"IF_LOGICALZ">;
-  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
-  defm BREAK_LOGICALNZ : BranchInstr<"BREAK_LOGICALNZ">;
-  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
-  defm BREAK_LOGICALZ : BranchInstr<"BREAK_LOGICALZ">;
-  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
-  defm CONTINUE_LOGICALNZ : BranchInstr<"CONTINUE_LOGICALNZ">;
-  // This opcode has custom swizzle pattern encoded in Swizzle Encoder
-  defm CONTINUE_LOGICALZ : BranchInstr<"CONTINUE_LOGICALZ">;
-  defm IFC         : BranchInstr2<"IFC">;
-  defm BREAKC      : BranchInstr2<"BREAKC">;
-  defm CONTINUEC   : BranchInstr2<"CONTINUEC">;
-}
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 757b733522..ce0b56bdf0 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -27,6 +27,7 @@ add_llvm_target(R600CodeGen
   AMDGPUAsmPrinter.cpp
   AMDGPUMCInstLower.cpp
   AMDGPUSubtarget.cpp
+  AMDGPUStructurizeCFG.cpp
   AMDGPUTargetMachine.cpp
   AMDGPUISelLowering.cpp
author	Tom Stellard <thomas.stellard@amd.com>	2012-12-19 22:10:31 +0000
committer	Tom Stellard <thomas.stellard@amd.com>	2012-12-19 22:10:31 +0000
commit	6b7d99d47321ebb478b22afd2e317fe89d2149db (patch)
tree	548126753462b8985bf846b022d39b2c51e96f96 /lib
parent	6eebe47060eec7e3a4ae95d4b4835869108f9c07 (diff)