From cbf24b4e58c2f621f480883c5bb1f2f2b2b8d497 Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Wed, 20 Jun 2012 03:42:09 +0000
Subject: Move the implementation of LoopInfo into LoopInfoImpl.h.

The implementation only needs inclusion from LoopInfo.cpp and
MachineLoopInfo.cpp. Clients of the interface should only include the
interface. This makes the interface readable and speeds up rebuilds
after modifying the implementation.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158787 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/MachineLoopInfo.cpp | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

(limited to 'lib/CodeGen/MachineLoopInfo.cpp')

diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index 189cb2ba5d..b5d5ad9be9 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -9,7 +9,7 @@
 //
 // This file defines the MachineLoopInfo class that is used to identify natural
 // loops and determine the loop depth of various nodes of the CFG.  Note that
-// the loops identified may actually be several natural loops that share the 
+// the loops identified may actually be several natural loops that share the
 // same header node... not just a single natural loop.
 //
 //===----------------------------------------------------------------------===//
@@ -17,17 +17,13 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/LoopInfoImpl.h"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
 
-namespace llvm {
-#define MLB class LoopBase<MachineBasicBlock, MachineLoop>
-TEMPLATE_INSTANTIATION(MLB);
-#undef MLB
-#define MLIB class LoopInfoBase<MachineBasicBlock, MachineLoop>
-TEMPLATE_INSTANTIATION(MLIB);
-#undef MLIB
-}
+// Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops.
+template class llvm::LoopBase<MachineBasicBlock, MachineLoop>;
+template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>;
 
 char MachineLoopInfo::ID = 0;
 INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
-- 
cgit v1.2.3-70-g09d2


From 37aa33bc11c01a7142bfa2428a5a4d219b07b6c3 Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Wed, 20 Jun 2012 05:23:33 +0000
Subject: A new algorithm for computing LoopInfo. Temporarily disabled.

-stable-loops enables a new algorithm for generating the Loop
forest. It differs from the original algorithm in a few respects:

- Not determined by use-list order.
- Initially guarantees RPO order of block and subloops.
- Linear in the number of CFG edges.
- Nonrecursive.

I didn't want to change the LoopInfo API yet, so the block lists are
still inclusive. This seems strange to me, and it means that building
LoopInfo is not strictly linear, but it may not be a problem in
practice. At least the block lists start out in RPO order now. In the
future we may add an attribute or wrapper analysis that allows other
passes to assume RPO order.

The primary motivation of this work was not to optimize LoopInfo, but
to allow reproducing performance issues by decomposing the compilation
stages. I'm often unable to do this with the current LoopInfo, because
the loop tree order determines Loop pass order. Serializing the IR
tends to invert the order, which reverses the optimization order. This
makes it nearly impossible to debug interdependent loop optimizations
such as LSR.

I also believe this will provide more stable performance results across time.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158790 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Analysis/LoopInfo.h     |   8 ++
 include/llvm/Analysis/LoopInfoImpl.h | 199 +++++++++++++++++++++++++++++++++++
 lib/Analysis/LoopInfo.cpp            |   9 +-
 lib/CodeGen/MachineLoopInfo.cpp      |  10 +-
 4 files changed, 224 insertions(+), 2 deletions(-)

(limited to 'lib/CodeGen/MachineLoopInfo.cpp')

diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 03ca316cfc..329650e81f 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -97,6 +97,9 @@ public:
   BlockT *getHeader() const { return Blocks.front(); }
   LoopT *getParentLoop() const { return ParentLoop; }
 
+  /// setParentLoop is a raw interface for bypassing addChildLoop.
+  void setParentLoop(LoopT *L) { ParentLoop = L; }
+
   /// contains - Return true if the specified loop is contained within in
   /// this loop.
   ///
@@ -122,6 +125,7 @@ public:
   /// iterator/begin/end - Return the loops contained entirely within this loop.
   ///
   const std::vector<LoopT *> &getSubLoops() const { return SubLoops; }
+  std::vector<LoopT *> &getSubLoopsVector() { return SubLoops; }
   typedef typename std::vector<LoopT *>::const_iterator iterator;
   iterator begin() const { return SubLoops.begin(); }
   iterator end() const { return SubLoops.end(); }
@@ -130,6 +134,7 @@ public:
   /// getBlocks - Get a list of the basic blocks which make up this loop.
   ///
   const std::vector<BlockT*> &getBlocks() const { return Blocks; }
+  std::vector<BlockT*> &getBlocksVector() { return Blocks; }
   typedef typename std::vector<BlockT*>::const_iterator block_iterator;
   block_iterator block_begin() const { return Blocks.begin(); }
   block_iterator block_end() const { return Blocks.end(); }
@@ -528,6 +533,9 @@ public:
   /// inserted into L instead.
   void InsertLoopInto(LoopT *L, LoopT *Parent);
 
+  /// Create the loop forest using a stable algorithm.
+  void Analyze(DominatorTreeBase<BlockT> &DomTree);
+
   // Debugging
 
   void print(raw_ostream &OS) const;
diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h
index ab83bb12fe..e58654ab87 100644
--- a/include/llvm/Analysis/LoopInfoImpl.h
+++ b/include/llvm/Analysis/LoopInfoImpl.h
@@ -16,6 +16,7 @@
 #define LLVM_ANALYSIS_LOOP_INFO_IMPL_H
 
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/ADT/PostOrderIterator.h"
 
 namespace llvm {
 
@@ -531,6 +532,204 @@ void LoopInfoBase<BlockT, LoopT>::InsertLoopInto(LoopT *L, LoopT *Parent) {
   L->ParentLoop = Parent;
 }
 
+//===----------------------------------------------------------------------===//
+/// Stable LoopInfo Analysis - Build a loop tree using stable iterators so the
+/// result does / not depend on use list (block predecessor) order.
+///
+
+/// Discover a subloop with the specified backedges such that: All blocks within
+/// this loop are mapped to this loop or a subloop. And all subloops within this
+/// loop have their parent loop set to this loop or a subloop.
+template<class BlockT, class LoopT>
+static void discoverAndMapSubloop(LoopT *L, ArrayRef<BlockT*> Backedges,
+                                  LoopInfoBase<BlockT, LoopT> *LI,
+                                  DominatorTreeBase<BlockT> &DomTree) {
+  typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
+
+  unsigned NumBlocks = 0;
+  unsigned NumSubloops = 0;
+
+  // Perform a backward CFG traversal using a worklist.
+  std::vector<BlockT *> ReverseCFGWorklist(Backedges.begin(), Backedges.end());
+  while (!ReverseCFGWorklist.empty()) {
+    BlockT *PredBB = ReverseCFGWorklist.back();
+    ReverseCFGWorklist.pop_back();
+
+    LoopT *Subloop = LI->getLoopFor(PredBB);
+    if (!Subloop) {
+      if (!DomTree.isReachableFromEntry(PredBB))
+        continue;
+
+      // This is an undiscovered block. Map it to the current loop.
+      LI->changeLoopFor(PredBB, L);
+      ++NumBlocks;
+      if (PredBB == L->getHeader())
+          continue;
+      // Push all block predecessors on the worklist.
+      ReverseCFGWorklist.insert(ReverseCFGWorklist.end(),
+                                InvBlockTraits::child_begin(PredBB),
+                                InvBlockTraits::child_end(PredBB));
+    }
+    else {
+      // This is a discovered block. Find its outermost discovered loop.
+      while (LoopT *Parent = Subloop->getParentLoop())
+        Subloop = Parent;
+
+      // If it is already discovered to be a subloop of this loop, continue.
+      if (Subloop == L)
+        continue;
+
+      // Discover a subloop of this loop.
+      Subloop->setParentLoop(L);
+      ++NumSubloops;
+      NumBlocks += Subloop->getBlocks().capacity();
+      PredBB = Subloop->getHeader();
+      // Continue traversal along predecessors that are not loop-back edges from
+      // within this subloop tree itself. Note that a predecessor may directly
+      // reach another subloop that is not yet discovered to be a subloop of
+      // this loop, which we must traverse.
+      for (typename InvBlockTraits::ChildIteratorType PI =
+             InvBlockTraits::child_begin(PredBB),
+             PE = InvBlockTraits::child_end(PredBB); PI != PE; ++PI) {
+        if (LI->getLoopFor(*PI) != Subloop)
+          ReverseCFGWorklist.push_back(*PI);
+      }
+    }
+  }
+  L->getSubLoopsVector().reserve(NumSubloops);
+  L->getBlocksVector().reserve(NumBlocks);
+}
+
+namespace {
+/// Populate all loop data in a stable order during a single forward DFS.
+template<class BlockT, class LoopT>
+class PopulateLoopsDFS {
+  typedef GraphTraits<BlockT*> BlockTraits;
+  typedef typename BlockTraits::ChildIteratorType SuccIterTy;
+
+  LoopInfoBase<BlockT, LoopT> *LI;
+  DenseSet<const BlockT *> VisitedBlocks;
+  std::vector<std::pair<BlockT*, SuccIterTy> > DFSStack;
+
+public:
+  PopulateLoopsDFS(LoopInfoBase<BlockT, LoopT> *li):
+    LI(li) {}
+
+  void traverse(BlockT *EntryBlock);
+
+protected:
+  void reverseInsertIntoLoop(BlockT *Block);
+
+  BlockT *dfsSource() { return DFSStack.back().first; }
+  SuccIterTy &dfsSucc() { return DFSStack.back().second; }
+  SuccIterTy dfsSuccEnd() { return BlockTraits::child_end(dfsSource()); }
+
+  void pushBlock(BlockT *Block) {
+    DFSStack.push_back(std::make_pair(Block, BlockTraits::child_begin(Block)));
+  }
+};
+} // anonymous
+
+/// Top-level driver for the forward DFS within the loop.
+template<class BlockT, class LoopT>
+void PopulateLoopsDFS<BlockT, LoopT>::traverse(BlockT *EntryBlock) {
+  pushBlock(EntryBlock);
+  VisitedBlocks.insert(EntryBlock);
+  while (!DFSStack.empty()) {
+    // Traverse the leftmost path as far as possible.
+    while (dfsSucc() != dfsSuccEnd()) {
+      BlockT *BB = *dfsSucc();
+      ++dfsSucc();
+      if (!VisitedBlocks.insert(BB).second)
+        continue;
+
+      // Push the next DFS successor onto the stack.
+      pushBlock(BB);
+    }
+    // Visit the top of the stack in postorder and backtrack.
+    reverseInsertIntoLoop(dfsSource());
+    DFSStack.pop_back();
+  }
+}
+
+/// Add a single Block to its ancestor loops in PostOrder. If the block is a
+/// subloop header, add the subloop to its parent in PostOrder, then reverse the
+/// Block and Subloop vectors of the now complete subloop to achieve RPO.
+template<class BlockT, class LoopT>
+void PopulateLoopsDFS<BlockT, LoopT>::reverseInsertIntoLoop(BlockT *Block) {
+  for (LoopT *Subloop = LI->getLoopFor(Block);
+       Subloop; Subloop = Subloop->getParentLoop()) {
+
+    if (Block != Subloop->getHeader()) {
+      Subloop->getBlocksVector().push_back(Block);
+      continue;
+    }
+    if (Subloop->getParentLoop())
+      Subloop->getParentLoop()->getSubLoopsVector().push_back(Subloop);
+    else
+      LI->addTopLevelLoop(Subloop);
+
+    // For convenience, Blocks and Subloops are inserted in postorder. Reverse
+    // the lists, except for the loop header, which is always at the beginning.
+    std::reverse(Subloop->getBlocksVector().begin()+1,
+                 Subloop->getBlocksVector().end());
+    std::reverse(Subloop->getSubLoopsVector().begin(),
+                 Subloop->getSubLoopsVector().end());
+  }
+}
+
+/// Analyze LoopInfo discovers loops during a postorder DominatorTree traversal
+/// interleaved with backward CFG traversals within each subloop
+/// (discoverAndMapSubloop). The backward traversal skips inner subloops, so
+/// this part of the algorithm is linear in the number of CFG edges. Subloop and
+/// Block vectors are then populated during a single forward CFG traversal
+/// (PopulateLoopDFS).
+///
+/// During the two CFG traversals each block is seen three times:
+/// 1) Discovered and mapped by a reverse CFG traversal.
+/// 2) Visited during a forward DFS CFG traversal.
+/// 3) Reverse-inserted in the loop in postorder following forward DFS.
+///
+/// The Block vectors are inclusive, so step 3 requires loop-depth number of
+/// insertions per block.
+template<class BlockT, class LoopT>
+void LoopInfoBase<BlockT, LoopT>::
+Analyze(DominatorTreeBase<BlockT> &DomTree) {
+
+  // Postorder traversal of the dominator tree.
+  DomTreeNodeBase<BlockT>* DomRoot = DomTree.getRootNode();
+  for (po_iterator<DomTreeNodeBase<BlockT>*> DomIter = po_begin(DomRoot),
+         DomEnd = po_end(DomRoot); DomIter != DomEnd; ++DomIter) {
+
+    BlockT *Header = DomIter->getBlock();
+    SmallVector<BlockT *, 4> Backedges;
+
+    // Check each predecessor of the potential loop header.
+    typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
+    for (typename InvBlockTraits::ChildIteratorType PI =
+           InvBlockTraits::child_begin(Header),
+           PE = InvBlockTraits::child_end(Header); PI != PE; ++PI) {
+
+      BlockT *Backedge = *PI;
+
+      // If Header dominates predBB, this is a new loop. Collect the backedges.
+      if (DomTree.dominates(Header, Backedge)
+          && DomTree.isReachableFromEntry(Backedge)) {
+        Backedges.push_back(Backedge);
+      }
+    }
+    // Perform a backward CFG traversal to discover and map blocks in this loop.
+    if (!Backedges.empty()) {
+      LoopT *L = new LoopT(Header);
+      discoverAndMapSubloop(L, ArrayRef<BlockT*>(Backedges), this, DomTree);
+    }
+  }
+  // Perform a single forward CFG traversal to populate block and subloop
+  // vectors for all loops.
+  PopulateLoopsDFS<BlockT, LoopT> DFS(this);
+  DFS.traverse(DomRoot->getBlock());
+}
+
 // Debugging
 template<class BlockT, class LoopT>
 void LoopInfoBase<BlockT, LoopT>::print(raw_ostream &OS) const {
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index e46be981fc..5d5f5066b3 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -44,6 +44,10 @@ static cl::opt<bool,true>
 VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
                 cl::desc("Verify loop info (time consuming)"));
 
+static cl::opt<bool>
+StableLoopInfo("stable-loops", cl::Hidden, cl::init(false),
+               cl::desc("Compute a stable loop tree."));
+
 char LoopInfo::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
@@ -512,7 +516,10 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
 //
 bool LoopInfo::runOnFunction(Function &) {
   releaseMemory();
-  LI.Calculate(getAnalysis<DominatorTree>().getBase());    // Update
+  if (StableLoopInfo)
+    LI.Analyze(getAnalysis<DominatorTree>().getBase());
+  else
+    LI.Calculate(getAnalysis<DominatorTree>().getBase());    // Update
   return false;
 }
 
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index b5d5ad9be9..b7298e48ee 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Analysis/LoopInfoImpl.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
 
@@ -25,6 +26,10 @@ using namespace llvm;
 template class llvm::LoopBase<MachineBasicBlock, MachineLoop>;
 template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>;
 
+static cl::opt<bool>
+StableLoopInfo("stable-machine-loops", cl::Hidden, cl::init(false),
+               cl::desc("Compute a stable loop tree."));
+
 char MachineLoopInfo::ID = 0;
 INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
                 "Machine Natural Loop Construction", true, true)
@@ -36,7 +41,10 @@ char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
 
 bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
   releaseMemory();
-  LI.Calculate(getAnalysis<MachineDominatorTree>().getBase());    // Update
+  if (StableLoopInfo)
+    LI.Analyze(getAnalysis<MachineDominatorTree>().getBase());
+  else
+    LI.Calculate(getAnalysis<MachineDominatorTree>().getBase());    // Update
   return false;
 }
 
-- 
cgit v1.2.3-70-g09d2


From c9b1e25493b393013b28e5d457f2fb2845a4dd9f Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Tue, 26 Jun 2012 04:11:38 +0000
Subject: Enable the new LoopInfo algorithm by default.

The primary advantage is that loop optimizations will be applied in a
stable order. This helps debugging and unit test creation. It is also
a better overall implementation without pathologically bad performance
on deep functions.

On large functions (llvm-stress --size=200000 | opt -loops)
Before: 0.1263s
After:  0.0225s

On deep functions (after tweaking llvm-stress, thanks Nadav):
Before: 0.2281s
After:  0.0227s

See r158790 for more comments.

The loop tree is now consistently generated in forward order, but loop
passes are applied in reverse order over the program. If we have a
loop optimization that prefers forward order, that can easily be
achieved by adding a different type of LoopPassManager.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159183 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/Analysis/LoopInfo.h                   |  24 ++-
 include/llvm/Analysis/LoopInfoImpl.h               | 179 ---------------------
 lib/Analysis/LoopInfo.cpp                          |   9 +-
 lib/Analysis/LoopPass.cpp                          |  10 +-
 lib/CodeGen/MachineLoopInfo.cpp                    |  10 +-
 test/CodeGen/X86/multiple-loop-post-inc.ll         |   8 +-
 test/Transforms/LCSSA/unused-phis.ll               |   4 +-
 .../LoopUnswitch/2011-11-18-SimpleSwitch.ll        |   8 +-
 .../2011-11-18-TwoSwitches-Threshold.ll            |  10 +-
 .../LoopUnswitch/2011-11-18-TwoSwitches.ll         |  34 ++--
 10 files changed, 52 insertions(+), 244 deletions(-)

(limited to 'lib/CodeGen/MachineLoopInfo.cpp')

diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 14d87e0034..eeb482d82a 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -127,8 +127,12 @@ public:
   const std::vector<LoopT *> &getSubLoops() const { return SubLoops; }
   std::vector<LoopT *> &getSubLoopsVector() { return SubLoops; }
   typedef typename std::vector<LoopT *>::const_iterator iterator;
+  typedef typename std::vector<LoopT *>::const_reverse_iterator
+    reverse_iterator;
   iterator begin() const { return SubLoops.begin(); }
   iterator end() const { return SubLoops.end(); }
+  reverse_iterator rbegin() const { return SubLoops.rbegin(); }
+  reverse_iterator rend() const { return SubLoops.rend(); }
   bool empty() const { return SubLoops.empty(); }
 
   /// getBlocks - Get a list of the basic blocks which make up this loop.
@@ -431,8 +435,12 @@ public:
   /// function.
   ///
   typedef typename std::vector<LoopT *>::const_iterator iterator;
+  typedef typename std::vector<LoopT *>::const_reverse_iterator
+    reverse_iterator;
   iterator begin() const { return TopLevelLoops.begin(); }
   iterator end() const { return TopLevelLoops.end(); }
+  reverse_iterator rbegin() const { return TopLevelLoops.rbegin(); }
+  reverse_iterator rend() const { return TopLevelLoops.rend(); }
   bool empty() const { return TopLevelLoops.empty(); }
 
   /// getLoopFor - Return the inner most loop that BB lives in.  If a basic
@@ -525,19 +533,6 @@ public:
     return isNotAlreadyContainedIn(SubLoop->getParentLoop(), ParentLoop);
   }
 
-  void Calculate(DominatorTreeBase<BlockT> &DT);
-
-  LoopT *ConsiderForLoop(BlockT *BB, DominatorTreeBase<BlockT> &DT);
-
-  /// MoveSiblingLoopInto - This method moves the NewChild loop to live inside
-  /// of the NewParent Loop, instead of being a sibling of it.
-  void MoveSiblingLoopInto(LoopT *NewChild, LoopT *NewParent);
-
-  /// InsertLoopInto - This inserts loop L into the specified parent loop.  If
-  /// the parent loop contains a loop which should contain L, the loop gets
-  /// inserted into L instead.
-  void InsertLoopInto(LoopT *L, LoopT *Parent);
-
   /// Create the loop forest using a stable algorithm.
   void Analyze(DominatorTreeBase<BlockT> &DomTree);
 
@@ -570,8 +565,11 @@ public:
   /// function.
   ///
   typedef LoopInfoBase<BasicBlock, Loop>::iterator iterator;
+  typedef LoopInfoBase<BasicBlock, Loop>::reverse_iterator reverse_iterator;
   inline iterator begin() const { return LI.begin(); }
   inline iterator end() const { return LI.end(); }
+  inline reverse_iterator rbegin() const { return LI.rbegin(); }
+  inline reverse_iterator rend() const { return LI.rend(); }
   bool empty() const { return LI.empty(); }
 
   /// getLoopFor - Return the inner most loop that BB lives in.  If a basic
diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h
index 1be717cb71..c07fbf7aa8 100644
--- a/include/llvm/Analysis/LoopInfoImpl.h
+++ b/include/llvm/Analysis/LoopInfoImpl.h
@@ -353,185 +353,6 @@ void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth) const {
     (*I)->print(OS, Depth+2);
 }
 
-//===----------------------------------------------------------------------===//
-/// LoopInfo - This class builds and contains all of the top level loop
-/// structures in the specified function.
-///
-
-template<class BlockT, class LoopT>
-void LoopInfoBase<BlockT, LoopT>::Calculate(DominatorTreeBase<BlockT> &DT) {
-  BlockT *RootNode = DT.getRootNode()->getBlock();
-
-  for (df_iterator<BlockT*> NI = df_begin(RootNode),
-         NE = df_end(RootNode); NI != NE; ++NI)
-    if (LoopT *L = ConsiderForLoop(*NI, DT))
-      TopLevelLoops.push_back(L);
-}
-
-template<class BlockT, class LoopT>
-LoopT *LoopInfoBase<BlockT, LoopT>::
-ConsiderForLoop(BlockT *BB, DominatorTreeBase<BlockT> &DT) {
-  if (BBMap.count(BB)) return 0; // Haven't processed this node?
-
-  std::vector<BlockT *> TodoStack;
-
-  // Scan the predecessors of BB, checking to see if BB dominates any of
-  // them.  This identifies backedges which target this node...
-  typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
-  for (typename InvBlockTraits::ChildIteratorType I =
-         InvBlockTraits::child_begin(BB), E = InvBlockTraits::child_end(BB);
-       I != E; ++I) {
-    typename InvBlockTraits::NodeType *N = *I;
-    // If BB dominates its predecessor...
-    if (DT.dominates(BB, N) && DT.isReachableFromEntry(N))
-      TodoStack.push_back(N);
-  }
-
-  if (TodoStack.empty()) return 0;  // No backedges to this block...
-
-  // Create a new loop to represent this basic block...
-  LoopT *L = new LoopT(BB);
-  BBMap[BB] = L;
-
-  while (!TodoStack.empty()) {  // Process all the nodes in the loop
-    BlockT *X = TodoStack.back();
-    TodoStack.pop_back();
-
-    if (!L->contains(X) &&         // As of yet unprocessed??
-        DT.isReachableFromEntry(X)) {
-      // Check to see if this block already belongs to a loop.  If this occurs
-      // then we have a case where a loop that is supposed to be a child of
-      // the current loop was processed before the current loop.  When this
-      // occurs, this child loop gets added to a part of the current loop,
-      // making it a sibling to the current loop.  We have to reparent this
-      // loop.
-      if (LoopT *SubLoop =
-          const_cast<LoopT *>(getLoopFor(X)))
-        if (SubLoop->getHeader() == X && isNotAlreadyContainedIn(SubLoop, L)){
-          // Remove the subloop from its current parent...
-          assert(SubLoop->ParentLoop && SubLoop->ParentLoop != L);
-          LoopT *SLP = SubLoop->ParentLoop;  // SubLoopParent
-          typename std::vector<LoopT *>::iterator I =
-            std::find(SLP->SubLoops.begin(), SLP->SubLoops.end(), SubLoop);
-          assert(I != SLP->SubLoops.end() &&"SubLoop not a child of parent?");
-          SLP->SubLoops.erase(I);   // Remove from parent...
-
-          // Add the subloop to THIS loop...
-          SubLoop->ParentLoop = L;
-          L->SubLoops.push_back(SubLoop);
-        }
-
-      // Normal case, add the block to our loop...
-      L->Blocks.push_back(X);
-
-      typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
-
-      // Add all of the predecessors of X to the end of the work stack...
-      TodoStack.insert(TodoStack.end(), InvBlockTraits::child_begin(X),
-                       InvBlockTraits::child_end(X));
-    }
-  }
-
-  // If there are any loops nested within this loop, create them now!
-  for (typename std::vector<BlockT*>::iterator I = L->Blocks.begin(),
-         E = L->Blocks.end(); I != E; ++I)
-    if (LoopT *NewLoop = ConsiderForLoop(*I, DT)) {
-      L->SubLoops.push_back(NewLoop);
-      NewLoop->ParentLoop = L;
-    }
-
-  // Add the basic blocks that comprise this loop to the BBMap so that this
-  // loop can be found for them.
-  //
-  for (typename std::vector<BlockT*>::iterator I = L->Blocks.begin(),
-         E = L->Blocks.end(); I != E; ++I)
-    BBMap.insert(std::make_pair(*I, L));
-
-  // Now that we have a list of all of the child loops of this loop, check to
-  // see if any of them should actually be nested inside of each other.  We
-  // can accidentally pull loops our of their parents, so we must make sure to
-  // organize the loop nests correctly now.
-  {
-    std::map<BlockT *, LoopT *> ContainingLoops;
-    for (unsigned i = 0; i != L->SubLoops.size(); ++i) {
-      LoopT *Child = L->SubLoops[i];
-      assert(Child->getParentLoop() == L && "Not proper child loop?");
-
-      if (LoopT *ContainingLoop = ContainingLoops[Child->getHeader()]) {
-        // If there is already a loop which contains this loop, move this loop
-        // into the containing loop.
-        MoveSiblingLoopInto(Child, ContainingLoop);
-        --i;  // The loop got removed from the SubLoops list.
-      } else {
-        // This is currently considered to be a top-level loop.  Check to see
-        // if any of the contained blocks are loop headers for subloops we
-        // have already processed.
-        for (unsigned b = 0, e = Child->Blocks.size(); b != e; ++b) {
-          LoopT *&BlockLoop = ContainingLoops[Child->Blocks[b]];
-          if (BlockLoop == 0) {   // Child block not processed yet...
-            BlockLoop = Child;
-          } else if (BlockLoop != Child) {
-            LoopT *SubLoop = BlockLoop;
-            // Reparent all of the blocks which used to belong to BlockLoops
-            for (unsigned j = 0, f = SubLoop->Blocks.size(); j != f; ++j)
-              ContainingLoops[SubLoop->Blocks[j]] = Child;
-
-            // There is already a loop which contains this block, that means
-            // that we should reparent the loop which the block is currently
-            // considered to belong to to be a child of this loop.
-            MoveSiblingLoopInto(SubLoop, Child);
-            --i;  // We just shrunk the SubLoops list.
-          }
-        }
-      }
-    }
-  }
-
-  return L;
-}
-
-/// MoveSiblingLoopInto - This method moves the NewChild loop to live inside
-/// of the NewParent Loop, instead of being a sibling of it.
-template<class BlockT, class LoopT>
-void LoopInfoBase<BlockT, LoopT>::
-MoveSiblingLoopInto(LoopT *NewChild, LoopT *NewParent) {
-  LoopT *OldParent = NewChild->getParentLoop();
-  assert(OldParent && OldParent == NewParent->getParentLoop() &&
-         NewChild != NewParent && "Not sibling loops!");
-
-  // Remove NewChild from being a child of OldParent
-  typename std::vector<LoopT *>::iterator I =
-    std::find(OldParent->SubLoops.begin(), OldParent->SubLoops.end(),
-              NewChild);
-  assert(I != OldParent->SubLoops.end() && "Parent fields incorrect??");
-  OldParent->SubLoops.erase(I);   // Remove from parent's subloops list
-  NewChild->ParentLoop = 0;
-
-  InsertLoopInto(NewChild, NewParent);
-}
-
-/// InsertLoopInto - This inserts loop L into the specified parent loop.  If
-/// the parent loop contains a loop which should contain L, the loop gets
-/// inserted into L instead.
-template<class BlockT, class LoopT>
-void LoopInfoBase<BlockT, LoopT>::InsertLoopInto(LoopT *L, LoopT *Parent) {
-  BlockT *LHeader = L->getHeader();
-  assert(Parent->contains(LHeader) &&
-         "This loop should not be inserted here!");
-
-  // Check to see if it belongs in a child loop...
-  for (unsigned i = 0, e = static_cast<unsigned>(Parent->SubLoops.size());
-       i != e; ++i)
-    if (Parent->SubLoops[i]->contains(LHeader)) {
-      InsertLoopInto(L, Parent->SubLoops[i]);
-      return;
-    }
-
-  // If not, insert it here!
-  Parent->SubLoops.push_back(L);
-  L->ParentLoop = Parent;
-}
-
 //===----------------------------------------------------------------------===//
 /// Stable LoopInfo Analysis - Build a loop tree using stable iterators so the
 /// result does / not depend on use list (block predecessor) order.
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index c5e5bbe266..20c33a3d9d 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -44,10 +44,6 @@ static cl::opt<bool,true>
 VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
                 cl::desc("Verify loop info (time consuming)"));
 
-static cl::opt<bool>
-StableLoopInfo("stable-loops", cl::Hidden, cl::init(false),
-               cl::desc("Compute a stable loop tree."));
-
 char LoopInfo::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
@@ -516,10 +512,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
 //
 bool LoopInfo::runOnFunction(Function &) {
   releaseMemory();
-  if (StableLoopInfo)
-    LI.Analyze(getAnalysis<DominatorTree>().getBase());
-  else
-    LI.Calculate(getAnalysis<DominatorTree>().getBase());    // Update
+  LI.Analyze(getAnalysis<DominatorTree>().getBase());
   return false;
 }
 
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index aba700ac5c..1540112fe1 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -162,7 +162,7 @@ void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) {
 // Recurse through all subloops and all loops  into LQ.
 static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
   LQ.push_back(L);
-  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+  for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I)
     addLoopIntoQueue(*I, LQ);
 }
 
@@ -183,8 +183,12 @@ bool LPPassManager::runOnFunction(Function &F) {
   // Collect inherited analysis from Module level pass manager.
   populateInheritedAnalysis(TPM->activeStack);
 
-  // Populate Loop Queue
-  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+  // Populate the loop queue in reverse program order. There is no clear need to
+  // process sibling loops in either forward or reverse order. There may be some
+  // advantage in deleting uses in a later loop before optimizing the
+  // definitions in an earlier loop. If we find a clear reason to process in
+  // forward order, then a forward variant of LoopPassManager should be created.
+  for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
     addLoopIntoQueue(*I, LQ);
 
   if (LQ.empty()) // No loops, skip calling finalizers
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index b7298e48ee..9f3829e3c0 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -18,7 +18,6 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Analysis/LoopInfoImpl.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 using namespace llvm;
 
@@ -26,10 +25,6 @@ using namespace llvm;
 template class llvm::LoopBase<MachineBasicBlock, MachineLoop>;
 template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>;
 
-static cl::opt<bool>
-StableLoopInfo("stable-machine-loops", cl::Hidden, cl::init(false),
-               cl::desc("Compute a stable loop tree."));
-
 char MachineLoopInfo::ID = 0;
 INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
                 "Machine Natural Loop Construction", true, true)
@@ -41,10 +36,7 @@ char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
 
 bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
   releaseMemory();
-  if (StableLoopInfo)
-    LI.Analyze(getAnalysis<MachineDominatorTree>().getBase());
-  else
-    LI.Calculate(getAnalysis<MachineDominatorTree>().getBase());    // Update
+  LI.Analyze(getAnalysis<MachineDominatorTree>().getBase());
   return false;
 }
 
diff --git a/test/CodeGen/X86/multiple-loop-post-inc.ll b/test/CodeGen/X86/multiple-loop-post-inc.ll
index 7491190b01..9f7d036cf1 100644
--- a/test/CodeGen/X86/multiple-loop-post-inc.ll
+++ b/test/CodeGen/X86/multiple-loop-post-inc.ll
@@ -1,9 +1,9 @@
 ; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 -mcpu=nehalem < %s | FileCheck %s
 ; rdar://7236213
-
-; Xfailed now that scheduler 2-address hack is disabled a lea is generated.
-; The code isn't any worse though.
-; XFAIL: *
+;
+; The scheduler's 2-address hack has been disabled, so there is
+; currently no good guarantee that this test will pass until the
+; machine scheduler develops an equivalent heuristic.
 
 ; CodeGen shouldn't require any lea instructions inside the marked loop.
 ; It should properly set up post-increment uses and do coalescing for
diff --git a/test/Transforms/LCSSA/unused-phis.ll b/test/Transforms/LCSSA/unused-phis.ll
index aa2ab96341..01b214b8e3 100644
--- a/test/Transforms/LCSSA/unused-phis.ll
+++ b/test/Transforms/LCSSA/unused-phis.ll
@@ -2,9 +2,9 @@
 ; CHECK: exit1:
 ; CHECK: .lcssa =
 ; CHECK: exit2:
-; CHECK: .lcssa2 =
+; CHECK: .lcssa1 =
 ; CHECK: exit3:
-; CHECK-NOT: .lcssa1 =
+; CHECK-NOT: .lcssa
 
 ; Test to ensure that when there are multiple exit blocks, PHI nodes are
 ; only inserted by LCSSA when there is a use dominated by a given exit
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
index 8389fe4643..c1fd588106 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
@@ -35,11 +35,11 @@
 ; CHECK:      loop_begin.us1:                                   ; preds = %loop_begin.backedge.us5, %.split.split.us
 ; CHECK-NEXT:   %var_val.us2 = load i32* %var
 ; CHECK-NEXT:   switch i32 2, label %default.us-lcssa.us-lcssa.us [
-; CHECK-NEXT:     i32 1, label %inc.us3
-; CHECK-NEXT:     i32 2, label %dec.us4
+; CHECK-NEXT:     i32 1, label %inc.us4
+; CHECK-NEXT:     i32 2, label %dec.us3
 ; CHECK-NEXT:   ]
 
-; CHECK:      dec.us4:                                          ; preds = %loop_begin.us1
+; CHECK:      dec.us3:                                          ; preds = %loop_begin.us1
 ; CHECK-NEXT:   call void @decf() noreturn nounwind
 ; CHECK-NEXT:   br label %loop_begin.backedge.us5
 
@@ -81,7 +81,7 @@ inc:
 dec:
   call void @decf() noreturn nounwind
   br label %loop_begin
-default:  
+default:
   br label %loop_exit
 loop_exit:
   ret i32 0
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
index 05d98d513e..f3db471199 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches-Threshold.ll
@@ -19,15 +19,15 @@
 ; CHECK:        switch i32 1, label %second_switch.us [
 ; CHECK-NEXT:     i32 1, label %inc.us
 
-; CHECK:      inc.us:                                           ; preds = %second_switch.us, %loop_begin.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
-; CHECK-NEXT:   br label %loop_begin.backedge.us
-
 ; CHECK:      second_switch.us:                                 ; preds = %loop_begin.us
 ; CHECK-NEXT:   switch i32 %d, label %default.us [
 ; CHECK-NEXT:     i32 1, label %inc.us
 ; CHECK-NEXT:   ]
 
+; CHECK:      inc.us:                                           ; preds = %second_switch.us, %loop_begin.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us
+
 ; CHECK:      .split:                                           ; preds = %..split_crit_edge
 ; CHECK-NEXT:   br label %loop_begin
 
@@ -73,7 +73,7 @@ inc:
   call void @incf() noreturn nounwind
   br label %loop_begin
 
-default:  
+default:
   br label %loop_begin
 
 loop_exit:
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
index 1b186d6bec..270899642f 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
@@ -25,14 +25,14 @@
 ; CHECK-NEXT:   switch i32 1, label %second_switch.us.us [
 ; CHECK-NEXT:     i32 1, label %inc.us.us
 
-; CHECK:      inc.us.us:                                        ; preds = %second_switch.us.us, %loop_begin.us.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
-; CHECK-NEXT:   br label %loop_begin.backedge.us.us
-
 ; CHECK:      second_switch.us.us:                              ; preds = %loop_begin.us.us
 ; CHECK-NEXT:   switch i32 1, label %default.us.us [
 ; CHECK-NEXT:     i32 1, label %inc.us.us
 
+; CHECK:      inc.us.us:                                        ; preds = %second_switch.us.us, %loop_begin.us.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us.us
+
 ; CHECK:      .split.us.split:                                  ; preds = %.split.us..split.us.split_crit_edge
 ; CHECK-NEXT:   br label %loop_begin.us
 
@@ -41,10 +41,6 @@
 ; CHECK-NEXT:   switch i32 1, label %second_switch.us [
 ; CHECK-NEXT:     i32 1, label %inc.us
 
-; CHECK:      inc.us:                                           ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
-; CHECK-NEXT:   call void @incf() noreturn nounwind
-; CHECK-NEXT:   br label %loop_begin.backedge.us
-
 ; CHECK:      second_switch.us:                                 ; preds = %loop_begin.us
 ; CHECK-NEXT:   switch i32 %d, label %default.us [
 ; CHECK-NEXT:     i32 1, label %second_switch.us.inc.us_crit_edge
@@ -53,6 +49,10 @@
 ; CHECK:      second_switch.us.inc.us_crit_edge:                ; preds = %second_switch.us
 ; CHECK-NEXT:   br i1 true, label %us-unreachable8, label %inc.us
 
+; CHECK:      inc.us:                                           ; preds = %second_switch.us.inc.us_crit_edge, %loop_begin.us
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us
+
 ; CHECK:      .split:                                           ; preds = %..split_crit_edge
 ; CHECK-NEXT:   %3 = icmp eq i32 %d, 1
 ; CHECK-NEXT:   br i1 %3, label %.split.split.us, label %.split..split.split_crit_edge
@@ -65,21 +65,21 @@
 
 ; CHECK:      loop_begin.us1:                                   ; preds = %loop_begin.backedge.us6, %.split.split.us
 ; CHECK-NEXT:   %var_val.us2 = load i32* %var
-; CHECK-NEXT:   switch i32 %c, label %second_switch.us4 [
+; CHECK-NEXT:   switch i32 %c, label %second_switch.us3 [
 ; CHECK-NEXT:     i32 1, label %loop_begin.inc_crit_edge.us
 ; CHECK-NEXT:   ]
 
-; CHECK:      inc.us3:                                          ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us4
-; CHECK-NEXT:   call void @incf() noreturn nounwind
-; CHECK-NEXT:   br label %loop_begin.backedge.us6
-
-; CHECK:      second_switch.us4:                                ; preds = %loop_begin.us1
+; CHECK:      second_switch.us3:                                ; preds = %loop_begin.us1
 ; CHECK-NEXT:   switch i32 1, label %default.us5 [
-; CHECK-NEXT:     i32 1, label %inc.us3
+; CHECK-NEXT:     i32 1, label %inc.us4
 ; CHECK-NEXT:   ]
 
+; CHECK:      inc.us4:                                          ; preds = %loop_begin.inc_crit_edge.us, %second_switch.us3
+; CHECK-NEXT:   call void @incf() noreturn nounwind
+; CHECK-NEXT:   br label %loop_begin.backedge.us6
+
 ; CHECK:      loop_begin.inc_crit_edge.us:                      ; preds = %loop_begin.us1
-; CHECK-NEXT:   br i1 true, label %us-unreachable.us-lcssa.us, label %inc.us3
+; CHECK-NEXT:   br i1 true, label %us-unreachable.us-lcssa.us, label %inc.us4
 
 ; CHECK:      .split.split:                                     ; preds = %.split..split.split_crit_edge
 ; CHECK-NEXT:   br label %loop_begin
@@ -127,7 +127,7 @@ inc:
   call void @incf() noreturn nounwind
   br label %loop_begin
 
-default:  
+default:
   br label %loop_begin
 
 loop_exit:
-- 
cgit v1.2.3-70-g09d2