Fix some register-alias-related bugs in the post-RA scheduler liveness

computation code. Also, avoid adding output-depenency edges when both defs are dead, which frequently happens with EFLAGS defs. Compute Depth and Height lazily, and always in terms of edge latency values. For the schedulers that don't care about latency, edge latencies are set to 1. Eliminate Cycle and CycleBound, and LatencyPriorityQueue's Latencies array. These are all subsumed by the Depth and Height fields. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61073 91177308-0d34-0410-b5e6-96231b3b80d8
author: Dan Gohman <gohman@apple.com> 2008-12-16 03:25:46 +0000
committer: Dan Gohman <gohman@apple.com> 2008-12-16 03:25:46 +0000
commit: 3f23744df4809eba94284e601e81489212c974d4 (patch)
tree: bf2d1d7711a774bac89352554c163f167abf36bb /lib/CodeGen/PostRASchedulerList.cpp
parent: 64722e5163785da17ab581364c9655071b566180 (diff)
1 files changed, 61 insertions, 37 deletions
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 2f7c0118a0..0fc6c3370b 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -23,7 +23,9 @@
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -49,6 +51,14 @@ namespace {
     static char ID;
     PostRAScheduler() : MachineFunctionPass(&ID) {}
 
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
     const char *getPassName() const {
       return "Post RA top-down list latency scheduler";
     }
@@ -72,8 +82,10 @@ namespace {
     ScheduleDAGTopologicalSort Topo;
 
   public:
-    SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm)
-      : ScheduleDAGInstrs(mbb, tm), Topo(SUnits) {}
+    SchedulePostRATDList(MachineBasicBlock *mbb, const TargetMachine &tm,
+                         const MachineLoopInfo &MLI,
+                         const MachineDominatorTree &MDT)
+      : ScheduleDAGInstrs(mbb, tm, MLI, MDT), Topo(SUnits) {}
 
     void Schedule();
 
@@ -88,11 +100,14 @@ namespace {
 bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   DOUT << "PostRAScheduler\n";
 
+  const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+  const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+
   // Loop over all of the basic blocks
   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
        MBB != MBBe; ++MBB) {
 
-    SchedulePostRATDList Scheduler(MBB, Fn.getTarget());
+    SchedulePostRATDList Scheduler(MBB, Fn.getTarget(), MLI, MDT);
 
     Scheduler.Run();
 
@@ -142,6 +157,28 @@ getInstrOperandRegClass(const TargetRegisterInfo *TRI,
   return TRI->getRegClass(II.OpInfo[Op].RegClass);
 }
 
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static SDep *CriticalPathStep(SUnit *SU) {
+  SDep *Next = 0;
+  unsigned NextDepth = 0;
+  // Find the predecessor edge with the greatest depth.
+  for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+       P != PE; ++P) {
+    SUnit *PredSU = P->getSUnit();
+    unsigned PredLatency = P->getLatency();
+    unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+    // In the case of a latency tie, prefer an anti-dependency edge over
+    // other types of edges.
+    if (NextDepth < PredTotalLatency ||
+        (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+      NextDepth = PredTotalLatency;
+      Next = &*P;
+    }
+  }
+  return Next;
+}
+
 /// BreakAntiDependencies - Identifiy anti-dependencies along the critical path
 /// of the ScheduleDAG and break them by renaming registers.
 ///
@@ -150,34 +187,16 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
   // so just duck out immediately if the block is empty.
   if (BB->empty()) return false;
 
-  Topo.InitDAGTopologicalSorting();
-
-  // Compute a critical path for the DAG.
+  // Find the node at the bottom of the critical path.
   SUnit *Max = 0;
-  std::vector<SDep *> CriticalPath(SUnits.size());
-  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
-       E = Topo.end(); I != E; ++I) {
-    SUnit *SU = &SUnits[*I];
-    for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
-         P != PE; ++P) {
-      SUnit *PredSU = P->getSUnit();
-      // This assumes that there's no delay for reusing registers.
-      unsigned PredLatency = P->getLatency();
-      unsigned PredTotalLatency = PredSU->CycleBound + PredLatency;
-      if (SU->CycleBound < PredTotalLatency ||
-          (SU->CycleBound == PredTotalLatency &&
-           P->getKind() == SDep::Anti)) {
-        SU->CycleBound = PredTotalLatency;
-        CriticalPath[*I] = &*P;
-      }
-    }
-    // Keep track of the node at the end of the critical path.
-    if (!Max || SU->CycleBound + SU->Latency > Max->CycleBound + Max->Latency)
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    SUnit *SU = &SUnits[i];
+    if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
       Max = SU;
   }
 
   DOUT << "Critical path has total latency "
-       << (Max ? Max->CycleBound + Max->Latency : 0) << "\n";
+       << (Max ? Max->getDepth() + Max->Latency : 0) << "\n";
 
   // Walk the critical path from the bottom up. Collect all anti-dependence
   // edges on the critical path. Skip anti-dependencies between SUnits that
@@ -195,9 +214,9 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
   // the anti-dependencies in an instruction in order to be effective.
   BitVector AllocatableSet = TRI->getAllocatableSet(*MF);
   DenseMap<MachineInstr *, unsigned> CriticalAntiDeps;
-  for (SUnit *SU = Max; CriticalPath[SU->NodeNum];
-       SU = CriticalPath[SU->NodeNum]->getSUnit()) {
-    SDep *Edge = CriticalPath[SU->NodeNum];
+  SUnit *SU = Max;
+  for (SDep *Edge = CriticalPathStep(SU); Edge;
+       Edge = CriticalPathStep(SU = Edge->getSUnit())) {
     SUnit *NextSU = Edge->getSUnit();
     // Only consider anti-dependence edges.
     if (Edge->getKind() != SDep::Anti)
@@ -494,6 +513,11 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
         Classes[SubregReg] = 0;
         RegRefs.erase(SubregReg);
       }
+      for (const unsigned *Super = TRI->getSuperRegisters(Reg);
+           *Super; ++Super) {
+        unsigned SuperReg = *Super;
+        Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      }
     }
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI->getOperand(i);
@@ -556,8 +580,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
   // Compute how many cycles it will be before this actually becomes
   // available.  This is the max of the start time of all predecessors plus
   // their latencies.
-  unsigned PredDoneCycle = SU->Cycle + SuccEdge->getLatency();
-  SuccSU->CycleBound = std::max(SuccSU->CycleBound, PredDoneCycle);
+  SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
   
   if (SuccSU->NumPredsLeft == 0) {
     PendingQueue.push_back(SuccSU);
@@ -572,7 +595,8 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
   DEBUG(SU->dump(this));
   
   Sequence.push_back(SU);
-  SU->Cycle = CurCycle;
+  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle);
 
   // Top down: release successors.
   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
@@ -603,21 +627,21 @@ void SchedulePostRATDList::ListScheduleTopDown() {
   while (!AvailableQueue.empty() || !PendingQueue.empty()) {
     // Check to see if any of the pending instructions are ready to issue.  If
     // so, add them to the available queue.
+    unsigned MinDepth = ~0u;
     for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
-      if (PendingQueue[i]->CycleBound == CurCycle) {
+      if (PendingQueue[i]->getDepth() <= CurCycle) {
         AvailableQueue.push(PendingQueue[i]);
         PendingQueue[i]->isAvailable = true;
         PendingQueue[i] = PendingQueue.back();
         PendingQueue.pop_back();
         --i; --e;
-      } else {
-        assert(PendingQueue[i]->CycleBound > CurCycle && "Non-positive latency?");
-      }
+      } else if (PendingQueue[i]->getDepth() < MinDepth)
+        MinDepth = PendingQueue[i]->getDepth();
     }
     
     // If there are no instructions available, don't try to issue anything.
     if (AvailableQueue.empty()) {
-      ++CurCycle;
+      CurCycle = MinDepth != ~0u ? MinDepth : CurCycle + 1;
       continue;
     }
author	Dan Gohman <gohman@apple.com>	2008-12-16 03:25:46 +0000
committer	Dan Gohman <gohman@apple.com>	2008-12-16 03:25:46 +0000
commit	3f23744df4809eba94284e601e81489212c974d4 (patch)
tree	bf2d1d7711a774bac89352554c163f167abf36bb /lib/CodeGen/PostRASchedulerList.cpp
parent	64722e5163785da17ab581364c9655071b566180 (diff)