From 071966f6bf2a535b318995cfa5d0f5b641fb4e14 Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Tue, 18 Dec 2012 20:52:49 +0000
Subject: MISched: minor improvement, initialize remaining resources before the
 first scheduling decision.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170449 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/MachineScheduler.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index c7afa08fcd..d49eda24a7 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -1166,6 +1166,16 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
       RemainingCounts[PIdx] += (Factor * PI->Cycles);
     }
   }
+  for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds();
+       PIdx != PEnd; ++PIdx) {
+    if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx])
+        >= (int)SchedModel->getLatencyFactor()) {
+      CritResIdx = PIdx;
+    }
+  }
+  MaxRemainingCount = std::max(
+    RemainingMicroOps * SchedModel->getMicroOpFactor(),
+    RemainingCounts[CritResIdx]);
 }
 
 void ConvergingScheduler::SchedBoundary::
-- 
cgit v1.2.3-70-g09d2


From d453960f92993600c56e5b4c004cadca9d4f7dc8 Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Tue, 18 Dec 2012 20:52:52 +0000
Subject: MISched: cleanup, use the proper iterator type.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170450 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/MachineScheduler.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index d49eda24a7..fbbae3877a 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -1213,7 +1213,7 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) {
   if (SU->isScheduled)
     return;
 
-  for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
     unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
     unsigned MinLatency = I->getMinLatency();
-- 
cgit v1.2.3-70-g09d2


From 9c676c2941343aa5c3f933eb0dd97b22cca3d6f4 Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Tue, 18 Dec 2012 20:52:54 +0000
Subject: MISched: Remove SchedRemainder::IsResourceLimited. I don't know how
 to compute it.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170451 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/MachineScheduler.cpp | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index fbbae3877a..df3bec12f9 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -953,8 +953,6 @@ public:
     unsigned CritResIdx;
     // Number of micro-ops left to schedule.
     unsigned RemainingMicroOps;
-    // Is the unscheduled zone resource limited.
-    bool IsResourceLimited;
 
     unsigned MaxRemainingCount;
 
@@ -963,7 +961,6 @@ public:
       RemainingCounts.clear();
       CritResIdx = 0;
       RemainingMicroOps = 0;
-      IsResourceLimited = false;
       MaxRemainingCount = 0;
     }
 
-- 
cgit v1.2.3-70-g09d2


From 44fd0bcc40e04cf9aaaf0f33744ae4decbb656c3 Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Tue, 18 Dec 2012 20:52:56 +0000
Subject: MISched: Heuristics, compare latency more precisely. It matters more
 for some targets.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170452 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/MachineScheduler.cpp | 81 +++++++++++++++++++---------------------
 1 file changed, 38 insertions(+), 43 deletions(-)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index df3bec12f9..4536059799 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -954,19 +954,25 @@ public:
     // Number of micro-ops left to schedule.
     unsigned RemainingMicroOps;
 
-    unsigned MaxRemainingCount;
-
     void reset() {
       CriticalPath = 0;
       RemainingCounts.clear();
       CritResIdx = 0;
       RemainingMicroOps = 0;
-      MaxRemainingCount = 0;
     }
 
     SchedRemainder() { reset(); }
 
     void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
+
+    unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const {
+      if (!SchedModel->hasInstrSchedModel())
+        return 0;
+
+      return std::max(
+        RemainingMicroOps * SchedModel->getMicroOpFactor(),
+        RemainingCounts[CritResIdx]);
+    }
   };
 
   /// Each Scheduling boundary is associated with ready queues. It tracks the
@@ -1007,9 +1013,6 @@ public:
 
     unsigned ExpectedCount;
 
-    // Policy flag: attempt to find ILP until expected latency is covered.
-    bool ShouldIncreaseILP;
-
 #ifndef NDEBUG
     // Remember the greatest min operand latency.
     unsigned MaxMinLatency;
@@ -1030,7 +1033,6 @@ public:
       CritResIdx = 0;
       IsResourceLimited = false;
       ExpectedCount = 0;
-      ShouldIncreaseILP = false;
 #ifndef NDEBUG
       MaxMinLatency = 0;
 #endif
@@ -1058,7 +1060,7 @@ public:
     unsigned getUnscheduledLatency(SUnit *SU) const {
       if (isTop())
         return SU->getHeight();
-      return SU->getDepth();
+      return SU->getDepth() + SU->Latency;
     }
 
     unsigned getCriticalCount() const {
@@ -1067,7 +1069,7 @@ public:
 
     bool checkHazard(SUnit *SU);
 
-    void checkILPPolicy();
+    void setLatencyPolicy(CandPolicy &Policy);
 
     void releaseNode(SUnit *SU, unsigned ReadyCycle);
 
@@ -1170,9 +1172,6 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
       CritResIdx = PIdx;
     }
   }
-  MaxRemainingCount = std::max(
-    RemainingMicroOps * SchedModel->getMicroOpFactor(),
-    RemainingCounts[CritResIdx]);
 }
 
 void ConvergingScheduler::SchedBoundary::
@@ -1281,12 +1280,27 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
   return false;
 }
 
-/// If expected latency is covered, disable ILP policy.
-void ConvergingScheduler::SchedBoundary::checkILPPolicy() {
-  if (ShouldIncreaseILP
-      && (IsResourceLimited || ExpectedLatency <= CurrCycle)) {
-    ShouldIncreaseILP = false;
-    DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n');
+/// Compute the remaining latency to determine whether ILP should be increased.
+void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
+  // FIXME: compile time. In all, we visit four queues here one we should only
+  // need to visit the one that was last popped if we cache the result.
+  unsigned RemLatency = 0;
+  for (ReadyQueue::iterator I = Available.begin(), E = Available.end();
+       I != E; ++I) {
+    unsigned L = getUnscheduledLatency(*I);
+    if (L > RemLatency)
+      RemLatency = L;
+  }
+  for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end();
+       I != E; ++I) {
+    unsigned L = getUnscheduledLatency(*I);
+    if (L > RemLatency)
+      RemLatency = L;
+  }
+  if (RemLatency + ExpectedLatency >= Rem->CriticalPath + ILPWindow
+      && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
+    Policy.ReduceLatency = true;
+    DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
   }
 }
 
@@ -1305,15 +1319,6 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
 
   // Record this node as an immediate dependent of the scheduled node.
   NextSUs.insert(SU);
-
-  // If CriticalPath has been computed, then check if the unscheduled nodes
-  // exceed the ILP window. Before registerRoots, CriticalPath==0.
-  if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU)
-                            > Rem->CriticalPath + ILPWindow)) {
-    ShouldIncreaseILP = true;
-    DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " "
-          << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n');
-  }
 }
 
 /// Move the boundary of scheduled code by one cycle.
@@ -1361,9 +1366,6 @@ void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx,
   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
   Rem->RemainingCounts[PIdx] -= Count;
 
-  // Reset MaxRemainingCount for sanity.
-  Rem->MaxRemainingCount = 0;
-
   // Check if this resource exceeds the current critical resource by a full
   // cycle. If so, it becomes the critical resource.
   if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx])
@@ -1495,9 +1497,7 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
 /// resources.
 ///
 /// If the CriticalZone is latency limited, don't force a policy for the
-/// candidates here. Instead, When releasing each candidate, releaseNode
-/// compares the region's critical path to the candidate's height or depth and
-/// the scheduled zone's expected latency then sets ShouldIncreaseILP.
+/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed.
 void ConvergingScheduler::balanceZones(
   ConvergingScheduler::SchedBoundary &CriticalZone,
   ConvergingScheduler::SchedCandidate &CriticalCand,
@@ -1506,6 +1506,7 @@ void ConvergingScheduler::balanceZones(
 
   if (!CriticalZone.IsResourceLimited)
     return;
+  assert(SchedModel->hasInstrSchedModel() && "required schedmodel");
 
   SchedRemainder *Rem = CriticalZone.Rem;
 
@@ -1513,7 +1514,7 @@ void ConvergingScheduler::balanceZones(
   // remainder, try to reduce it.
   unsigned RemainingCritCount =
     Rem->RemainingCounts[CriticalZone.CritResIdx];
-  if ((int)(Rem->MaxRemainingCount - RemainingCritCount)
+  if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount)
       > (int)SchedModel->getLatencyFactor()) {
     CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx;
     DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce "
@@ -1539,12 +1540,9 @@ void ConvergingScheduler::checkResourceLimits(
   ConvergingScheduler::SchedCandidate &TopCand,
   ConvergingScheduler::SchedCandidate &BotCand) {
 
-  Bot.checkILPPolicy();
-  Top.checkILPPolicy();
-  if (Bot.ShouldIncreaseILP)
-    BotCand.Policy.ReduceLatency = true;
-  if (Top.ShouldIncreaseILP)
-    TopCand.Policy.ReduceLatency = true;
+  // Set ReduceLatency to true if needed.
+  Bot.setLatencyPolicy(TopCand.Policy);
+  Top.setLatencyPolicy(BotCand.Policy);
 
   // Handle resource-limited regions.
   if (Top.IsResourceLimited && Bot.IsResourceLimited
@@ -1579,9 +1577,6 @@ void ConvergingScheduler::checkResourceLimits(
   // The critical resource is different in each zone, so request balancing.
 
   // Compute the cost of each zone.
-  Rem.MaxRemainingCount = std::max(
-    Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(),
-    Rem.RemainingCounts[Rem.CritResIdx]);
   Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle);
   Top.ExpectedCount = std::max(
     Top.getCriticalCount(),
-- 
cgit v1.2.3-70-g09d2


From e3eddaec4a173c7b9c56aff455d74e4fb2f95daf Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Tue, 18 Dec 2012 20:52:58 +0000
Subject: MISched: Cleanup, redundant statement.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170453 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/MachineScheduler.cpp | 1 -
 1 file changed, 1 deletion(-)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 4536059799..117b2bdccf 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -1898,7 +1898,6 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
       Cand.setBest(TryCand);
       DEBUG(traceCandidate(Cand, Zone));
     }
-    TryCand.SU = *I;
   }
 }
 
-- 
cgit v1.2.3-70-g09d2


From 47579cf390c42e0577519e0a2b6044baece9df00 Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Wed, 9 Jan 2013 03:36:49 +0000
Subject: MIsched: add an ILP window property to machine model.

This was an experimental option, but needs to be defined
per-target. e.g. PPC A2 needs to aggressively hide latency.

I converted some in-order scheduling tests to A2. Hal is working on
more test cases.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@171946 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/CodeGen/TargetSchedule.h           |  3 ++
 include/llvm/MC/MCSchedule.h                    | 19 +++++++--
 include/llvm/Target/TargetSchedule.td           |  1 +
 lib/CodeGen/MachineScheduler.cpp                | 12 +-----
 lib/Target/ARM/ARMScheduleA9.td                 |  3 ++
 lib/Target/X86/X86Schedule.td                   |  5 +++
 lib/Target/X86/X86ScheduleAtom.td               |  1 +
 test/CodeGen/ARM/misched-inorder-latency.ll     | 48 ---------------------
 test/CodeGen/PowerPC/misched-inorder-latency.ll | 55 +++++++++++++++++++++++++
 utils/TableGen/SubtargetEmitter.cpp             |  1 +
 10 files changed, 86 insertions(+), 62 deletions(-)
 delete mode 100644 test/CodeGen/ARM/misched-inorder-latency.ll
 create mode 100644 test/CodeGen/PowerPC/misched-inorder-latency.ll

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h
index 4c4a2a8b95..484d7e200a 100644
--- a/include/llvm/CodeGen/TargetSchedule.h
+++ b/include/llvm/CodeGen/TargetSchedule.h
@@ -84,6 +84,9 @@ public:
   /// \brief Maximum number of micro-ops that may be scheduled per cycle.
   unsigned getIssueWidth() const { return SchedModel.IssueWidth; }
 
+  /// \brief Number of cycles the OOO processor is expected to hide.
+  unsigned getILPWindow() const { return SchedModel.ILPWindow; }
+
   /// \brief Return the number of issue slots required for this MI.
   unsigned getNumMicroOps(const MachineInstr *MI,
                           const MCSchedClassDesc *SC = 0) const;
diff --git a/include/llvm/MC/MCSchedule.h b/include/llvm/MC/MCSchedule.h
index 0c71ee5135..9e9474952a 100644
--- a/include/llvm/MC/MCSchedule.h
+++ b/include/llvm/MC/MCSchedule.h
@@ -155,7 +155,7 @@ public:
   //      Optional InstrItinerary OperandCycles provides expected latency.
   //      TODO: can't yet specify both min and expected latency per operand.
   int MinLatency;
-  static const unsigned DefaultMinLatency = -1;
+  static const int DefaultMinLatency = -1;
 
   // LoadLatency is the expected latency of load instructions.
   //
@@ -172,6 +172,16 @@ public:
   unsigned HighLatency;
   static const unsigned DefaultHighLatency = 10;
 
+  // ILPWindow is the number of cycles that the scheduler effectively ignores
+  // before attempting to hide latency. This should be zero for in-order cpus to
+  // always hide expected latency. For out-of-order cpus, it may be tweaked as
+  // desired to roughly approximate instruction buffers. The actual threshold is
+  // not very important for an OOO processor, as long as it isn't too high. A
+  // nonzero value helps avoid rescheduling to hide latency when its is fairly
+  // obviously useless and makes register pressure heuristics more effective.
+  unsigned ILPWindow;
+  static const unsigned DefaultILPWindow = 0;
+
   // MispredictPenalty is the typical number of extra cycles the processor
   // takes to recover from a branch misprediction.
   unsigned MispredictPenalty;
@@ -196,6 +206,7 @@ public:
                   MinLatency(DefaultMinLatency),
                   LoadLatency(DefaultLoadLatency),
                   HighLatency(DefaultHighLatency),
+                  ILPWindow(DefaultILPWindow),
                   MispredictPenalty(DefaultMispredictPenalty),
                   ProcID(0), ProcResourceTable(0), SchedClassTable(0),
                   NumProcResourceKinds(0), NumSchedClasses(0),
@@ -205,12 +216,12 @@ public:
   }
 
   // Table-gen driven ctor.
-  MCSchedModel(unsigned iw, int ml, unsigned ll, unsigned hl, unsigned mp,
-               unsigned pi, const MCProcResourceDesc *pr,
+  MCSchedModel(unsigned iw, int ml, unsigned ll, unsigned hl, unsigned ilp,
+               unsigned mp, unsigned pi, const MCProcResourceDesc *pr,
                const MCSchedClassDesc *sc, unsigned npr, unsigned nsc,
                const InstrItinerary *ii):
     IssueWidth(iw), MinLatency(ml), LoadLatency(ll), HighLatency(hl),
-    MispredictPenalty(mp), ProcID(pi), ProcResourceTable(pr),
+    ILPWindow(ilp), MispredictPenalty(mp), ProcID(pi), ProcResourceTable(pr),
     SchedClassTable(sc), NumProcResourceKinds(npr), NumSchedClasses(nsc),
     InstrItineraries(ii) {}
 
diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td
index 0da82fdd89..b7920bae8a 100644
--- a/include/llvm/Target/TargetSchedule.td
+++ b/include/llvm/Target/TargetSchedule.td
@@ -76,6 +76,7 @@ class SchedMachineModel {
   int IssueWidth = -1; // Max micro-ops that may be scheduled per cycle.
   int MinLatency = -1; // Determines which instrucions are allowed in a group.
                        // (-1) inorder (0) ooo, (1): inorder +var latencies.
+  int ILPWindow = -1;  // Cycles of latency likely hidden by hardware buffers.
   int LoadLatency = -1; // Cycles for loads to access the cache.
   int HighLatency = -1; // Approximation of cycles for "high latency" ops.
   int MispredictPenalty = -1; // Extra cycles for a mispredicted branch.
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 117b2bdccf..a32df7805b 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -48,15 +48,6 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
 static bool ViewMISchedDAGs = false;
 #endif // NDEBUG
 
-// Threshold to very roughly model an out-of-order processor's instruction
-// buffers. If the actual value of this threshold matters much in practice, then
-// it can be specified by the machine model. For now, it's an experimental
-// tuning knob to determine when and if it matters.
-static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden,
-  cl::desc("Allow expected latency to exceed the critical path by N cycles "
-           "before attempting to balance ILP"),
-  cl::init(10U));
-
 // Experimental heuristics
 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
   cl::desc("Enable load clustering."), cl::init(true));
@@ -1297,7 +1288,8 @@ void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) {
     if (L > RemLatency)
       RemLatency = L;
   }
-  if (RemLatency + ExpectedLatency >= Rem->CriticalPath + ILPWindow
+  unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow();
+  if (RemLatency + ExpectedLatency >= CriticalPathLimit
       && RemLatency > Rem->getMaxRemainingCount(SchedModel)) {
     Policy.ReduceLatency = true;
     DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n');
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 404634fee9..4191931a5a 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1887,6 +1887,9 @@ def CortexA9Model : SchedMachineModel {
   let LoadLatency = 2; // Optimistic load latency assuming bypass.
                        // This is overriden by OperandCycles if the
                        // Itineraries are queried instead.
+  let ILPWindow = 10; // Don't reschedule small blocks to hide
+                      // latency. Minimum latency requirements are already
+                      // modeled strictly by reserving resources.
   let MispredictPenalty = 8; // Based on estimate of pipeline depth.
 
   let Itineraries = CortexA9Itineraries;
diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td
index c14407f9ac..d99d085298 100644
--- a/lib/Target/X86/X86Schedule.td
+++ b/lib/Target/X86/X86Schedule.td
@@ -470,12 +470,17 @@ def IIC_NOP : InstrItinClass;
 // latencies. Since these latencies are not used for pipeline hazards,
 // they do not need to be exact.
 //
+// ILPWindow=10 is an arbitrary threshold that approximates cycles of
+// latency hidden by instruction buffers. The actual value is not very
+// important but should be zero for inorder and nonzero for OOO processors.
+//
 // The GenericModel contains no instruciton itineraries.
 def GenericModel : SchedMachineModel {
   let IssueWidth = 4;
   let MinLatency = 0;
   let LoadLatency = 4;
   let HighLatency = 10;
+  let ILPWindow = 10;
 }
 
 include "X86ScheduleAtom.td"
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index 87102614cc..1e5f2d6c9a 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -525,6 +525,7 @@ def AtomModel : SchedMachineModel {
                        // OperandCycles may be used for expected latency.
   let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles.
   let HighLatency = 30;// Expected, may be overriden by OperandCycles.
+  let ILPWindow = 0; // Always try to hide expected latency.
 
   let Itineraries = AtomItineraries;
 }
diff --git a/test/CodeGen/ARM/misched-inorder-latency.ll b/test/CodeGen/ARM/misched-inorder-latency.ll
deleted file mode 100644
index 8c06b4ce6e..0000000000
--- a/test/CodeGen/ARM/misched-inorder-latency.ll
+++ /dev/null
@@ -1,48 +0,0 @@
-; RUN: llc < %s -enable-misched -march=thumb -mcpu=swift \
-; RUN:          -pre-RA-sched=source -scheditins=false -ilp-window=0 \
-; RUN:          -disable-ifcvt-triangle-false -disable-post-ra | FileCheck %s
-;
-; For these tests, we set -ilp-window=0 to simulate in order processor.
-
-; %val1 is a 3-cycle load live out of %entry. It should be hoisted
-; above the add.
-; CHECK: @testload
-; CHECK: %entry
-; CHECK: ldr
-; CHECK: adds
-; CHECK: bne
-; CHECK: %true
-define i32 @testload(i32 *%ptr, i32 %sumin) {
-entry:
-  %sum1 = add i32 %sumin, 1
-  %val1 = load i32* %ptr
-  %p = icmp eq i32 %sumin, 0
-  br i1 %p, label %true, label %end
-true:
-  %sum2 = add i32 %sum1, 1
-  %ptr2 = getelementptr i32* %ptr, i32 1
-  %val = load i32* %ptr2
-  %val2 = add i32 %val1, %val
-  br label %end
-end:
-  %valmerge = phi i32 [ %val1, %entry], [ %val2, %true ]
-  %summerge = phi i32 [ %sum1, %entry], [ %sum2, %true ]
-  %sumout = add i32 %valmerge, %summerge
-  ret i32 %sumout
-}
-
-; The prefetch gets a default latency of 3 cycles and should be hoisted
-; above the add.
-;
-; CHECK: @testprefetch
-; CHECK: %entry
-; CHECK: pld
-; CHECK: adds
-; CHECK: bx
-define i32 @testprefetch(i8 *%ptr, i32 %i) {
-entry:
-  %tmp = add i32 %i, 1
-  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
-  ret i32 %tmp
-}
-declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
diff --git a/test/CodeGen/PowerPC/misched-inorder-latency.ll b/test/CodeGen/PowerPC/misched-inorder-latency.ll
new file mode 100644
index 0000000000..8fae7ad4d1
--- /dev/null
+++ b/test/CodeGen/PowerPC/misched-inorder-latency.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -enable-misched -pre-RA-sched=source -scheditins=false \
+; RUN:          -disable-ifcvt-triangle-false -disable-post-ra | FileCheck %s
+;
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+; %val1 is a load live out of %entry. It should be hoisted
+; above the add.
+; CHECK: testload:
+; CHECK: %entry
+; CHECK: lwz
+; CHECK: addi
+; CHECK: bne
+; CHECK: %true
+define i32 @testload(i32 *%ptr, i32 %sumin) {
+entry:
+  %sum1 = add i32 %sumin, 1
+  %val1 = load i32* %ptr
+  %p = icmp eq i32 %sumin, 0
+  br i1 %p, label %true, label %end
+true:
+  %sum2 = add i32 %sum1, 1
+  %ptr2 = getelementptr i32* %ptr, i32 1
+  %val = load i32* %ptr2
+  %val2 = add i32 %val1, %val
+  br label %end
+end:
+  %valmerge = phi i32 [ %val1, %entry], [ %val2, %true ]
+  %summerge = phi i32 [ %sum1, %entry], [ %sum2, %true ]
+  %sumout = add i32 %valmerge, %summerge
+  ret i32 %sumout
+}
+
+; The prefetch gets a default latency of 3 cycles and should be hoisted
+; above the add.
+;
+; CHECK: testprefetch:
+; CHECK: %entry
+; CHECK: dcbt
+; CHECK: addi
+; CHECK: blr
+define i32 @testprefetch(i8 *%ptr, i32 %i) {
+entry:
+  %val1 = add i32 %i, 1
+  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
+  %p = icmp eq i32 %i, 0
+  br i1 %p, label %true, label %end
+true:
+  %val2 = add i32 %val1, 1
+  br label %end
+end:
+  %valmerge = phi i32 [ %val1, %entry], [ %val2, %true ]
+  ret i32 %valmerge
+}
+declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index 3b7d006fd1..fc8d00dd83 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -1108,6 +1108,7 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
     EmitProcessorProp(OS, PI->ModelDef, "MinLatency", ',');
     EmitProcessorProp(OS, PI->ModelDef, "LoadLatency", ',');
     EmitProcessorProp(OS, PI->ModelDef, "HighLatency", ',');
+    EmitProcessorProp(OS, PI->ModelDef, "ILPWindow", ',');
     EmitProcessorProp(OS, PI->ModelDef, "MispredictPenalty", ',');
     OS << "  " << PI->Index << ", // Processor ID\n";
     if (PI->hasInstrSchedModel())
-- 
cgit v1.2.3-70-g09d2


From e9ccacd376b2a271434e739f0b7d468cc691723b Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Fri, 11 Jan 2013 17:46:50 +0000
Subject: Fix typo from r170452. Affects -enable-misched heuristics.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172223 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/MachineScheduler.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index a32df7805b..b965c7f997 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -1533,8 +1533,8 @@ void ConvergingScheduler::checkResourceLimits(
   ConvergingScheduler::SchedCandidate &BotCand) {
 
   // Set ReduceLatency to true if needed.
-  Bot.setLatencyPolicy(TopCand.Policy);
-  Top.setLatencyPolicy(BotCand.Policy);
+  Bot.setLatencyPolicy(BopCand.Policy);
+  Top.setLatencyPolicy(TotCand.Policy);
 
   // Handle resource-limited regions.
   if (Top.IsResourceLimited && Bot.IsResourceLimited
-- 
cgit v1.2.3-70-g09d2


From eed4e0193ff04ba27bfb6d0d4201505f03d99a7c Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Fri, 11 Jan 2013 17:51:16 +0000
Subject: Follow-up typo correction from building the wrong branch.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@172224 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/MachineScheduler.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index b965c7f997..c949266b8b 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -1533,8 +1533,8 @@ void ConvergingScheduler::checkResourceLimits(
   ConvergingScheduler::SchedCandidate &BotCand) {
 
   // Set ReduceLatency to true if needed.
-  Bot.setLatencyPolicy(BopCand.Policy);
-  Top.setLatencyPolicy(TotCand.Policy);
+  Bot.setLatencyPolicy(BotCand.Policy);
+  Top.setLatencyPolicy(TopCand.Policy);
 
   // Handle resource-limited regions.
   if (Top.IsResourceLimited && Bot.IsResourceLimited
-- 
cgit v1.2.3-70-g09d2


From db4170697f866dc8620946c77828ef0804996c3d Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Thu, 24 Jan 2013 02:09:57 +0000
Subject: MachineScheduler: enable biasCriticalPath for all DAGs.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173318 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/MachineScheduler.cpp | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index c949266b8b..b9198e8fc6 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -563,6 +563,10 @@ void ScheduleDAGMI::releaseRoots() {
   for (std::vector<SUnit>::iterator
          I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
     SUnit *SU = &(*I);
+
+    // Order predecessors so DFSResult follows the critical path.
+    SU->biasCriticalPath();
+
     // A SUnit is ready to top schedule if it has no predecessors.
     if (!I->NumPredsLeft && SU != &EntrySU)
       SchedImpl->releaseTopNode(SU);
-- 
cgit v1.2.3-70-g09d2


From 178f7d08a41f2e9432b96cd27f0c8ea42fa0ac9e Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Fri, 25 Jan 2013 04:01:04 +0000
Subject: MISched: Add SchedDFSResult to ScheduleDAGMI to formalize the
 interface and allow other strategies to select it.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173413 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/CodeGen/MachineScheduler.h | 30 ++++++++++---
 lib/CodeGen/MachineScheduler.cpp        | 80 ++++++++++++++++++++++-----------
 test/CodeGen/X86/misched-matrix.ll      |  3 ++
 3 files changed, 83 insertions(+), 30 deletions(-)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index 68489884dc..cc697a50ab 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -43,6 +43,7 @@ class MachineDominatorTree;
 class MachineLoopInfo;
 class RegisterClassInfo;
 class ScheduleDAGInstrs;
+class SchedDFSResult;
 
 /// MachineSchedContext provides enough context from the MachineScheduler pass
 /// for the target to instantiate a scheduler.
@@ -119,6 +120,9 @@ public:
   /// be scheduled at the bottom.
   virtual SUnit *pickNode(bool &IsTopNode) = 0;
 
+  /// \brief Scheduler callback to notify that a new subtree is scheduled.
+  virtual void scheduleTree(unsigned SubtreeID) {}
+
   /// Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an
   /// instruction and updated scheduled/remaining flags in the DAG nodes.
   virtual void schedNode(SUnit *SU, bool IsTopNode) = 0;
@@ -164,6 +168,8 @@ public:
 
   iterator end() { return Queue.end(); }
 
+  ArrayRef<SUnit*> elements() { return Queue; }
+
   iterator find(SUnit *SU) {
     return std::find(Queue.begin(), Queue.end(), SU);
   }
@@ -202,6 +208,11 @@ protected:
   RegisterClassInfo *RegClassInfo;
   MachineSchedStrategy *SchedImpl;
 
+  /// Information about DAG subtrees. If DFSResult is NULL, then SchedulerTrees
+  /// will be empty.
+  SchedDFSResult *DFSResult;
+  BitVector ScheduledTrees;
+
   /// Topo - A topological ordering for SUnits which permits fast IsReachable
   /// and similar queries.
   ScheduleDAGTopologicalSort Topo;
@@ -243,7 +254,7 @@ protected:
 public:
   ScheduleDAGMI(MachineSchedContext *C, MachineSchedStrategy *S):
     ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
-    AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S),
+    AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S), DFSResult(0),
     Topo(SUnits, &ExitSU), RPTracker(RegPressure), CurrentTop(),
     TopRPTracker(TopPressure), CurrentBottom(), BotRPTracker(BotPressure),
     NextClusterPred(NULL), NextClusterSucc(NULL) {
@@ -252,10 +263,7 @@ public:
 #endif
   }
 
-  virtual ~ScheduleDAGMI() {
-    DeleteContainerPointers(Mutations);
-    delete SchedImpl;
-  }
+  virtual ~ScheduleDAGMI();
 
   /// Add a postprocessing step to the DAG builder.
   /// Mutations are applied in the order that they are added after normal DAG
@@ -308,6 +316,18 @@ public:
 
   const SUnit *getNextClusterSucc() const { return NextClusterSucc; }
 
+  /// Initialize a DFSResult after DAG building is complete, and before any
+  /// queue comparisons.
+  void initDFSResult();
+
+  /// Compute DFS result once all interesting roots are discovered.
+  void computeDFSResult(ArrayRef<SUnit*> Roots);
+
+  /// Return a non-null DFS result if the scheduling strategy initialized it.
+  const SchedDFSResult *getDFSResult() const { return DFSResult; }
+
+  BitVector &getScheduledTrees() { return ScheduledTrees; }
+
 protected:
   // Top-Level entry points for the schedule() driver...
 
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index b9198e8fc6..3e5935cca7 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -56,6 +56,9 @@ static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
   cl::desc("Enable scheduling for macro fusion."), cl::init(true));
 
+// DAG subtrees must have at least this many nodes.
+static const unsigned MinSubtreeSize = 8;
+
 //===----------------------------------------------------------------------===//
 // Machine Instruction Scheduling Pass and Registry
 //===----------------------------------------------------------------------===//
@@ -301,6 +304,12 @@ void ReadyQueue::dump() {
 // preservation.
 //===----------------------------------------------------------------------===//
 
+ScheduleDAGMI::~ScheduleDAGMI() {
+  delete DFSResult;
+  DeleteContainerPointers(Mutations);
+  delete SchedImpl;
+}
+
 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
   if (SuccSU != &ExitSU) {
     // Do not use WillCreateCycle, it assumes SD scheduling.
@@ -504,8 +513,6 @@ void ScheduleDAGMI::schedule() {
   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           SUnits[su].dumpAll(this));
 
-  if (ViewMISchedDAGs) viewGraph();
-
   initQueues();
 
   bool IsTopNode = false;
@@ -554,6 +561,19 @@ void ScheduleDAGMI::postprocessDAG() {
   }
 }
 
+void ScheduleDAGMI::initDFSResult() {
+  if (!DFSResult)
+    DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
+  DFSResult->clear();
+  DFSResult->resize(SUnits.size());
+  ScheduledTrees.clear();
+}
+
+void ScheduleDAGMI::computeDFSResult(ArrayRef<SUnit*> Roots) {
+  DFSResult->compute(Roots);
+  ScheduledTrees.resize(DFSResult->getNumSubtrees());
+}
+
 // Release all DAG roots for scheduling.
 //
 // Nodes with unreleased weak edges can still be roots.
@@ -655,6 +675,15 @@ void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
 
   SU->isScheduled = true;
 
+  if (DFSResult) {
+    unsigned SubtreeID = DFSResult->getSubtreeID(SU);
+    if (!ScheduledTrees.test(SubtreeID)) {
+      ScheduledTrees.set(SubtreeID);
+      DFSResult->scheduleTree(SubtreeID);
+      SchedImpl->scheduleTree(SubtreeID);
+    }
+  }
+
   // Notify the scheduling strategy after updating the DAG.
   SchedImpl->schedNode(SU, IsTopNode);
 }
@@ -1187,6 +1216,8 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
   Top.init(DAG, SchedModel, &Rem);
   Bot.init(DAG, SchedModel, &Rem);
 
+  DAG->initDFSResult();
+
   // Initialize resource counts.
 
   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
@@ -1247,6 +1278,8 @@ void ConvergingScheduler::registerRoots() {
       Rem.CriticalPath = (*I)->getDepth();
   }
   DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
+
+  DAG->computeDFSResult(Bot.Available.elements());
 }
 
 /// Does this SU have a hazard within the current instruction group.
@@ -2056,12 +2089,11 @@ ConvergingSchedRegistry("converge", "Standard converging scheduler.",
 namespace {
 /// \brief Order nodes by the ILP metric.
 struct ILPOrder {
-  SchedDFSResult *DFSResult;
-  BitVector *ScheduledTrees;
+  const SchedDFSResult *DFSResult;
+  const BitVector *ScheduledTrees;
   bool MaximizeILP;
 
-  ILPOrder(SchedDFSResult *dfs, BitVector *schedtrees, bool MaxILP)
-    : DFSResult(dfs), ScheduledTrees(schedtrees), MaximizeILP(MaxILP) {}
+  ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
 
   /// \brief Apply a less-than relation on node priority.
   ///
@@ -2099,26 +2131,23 @@ class ILPScheduler : public MachineSchedStrategy {
   /// (a motivating test case must be found).
   static const unsigned SubtreeLimit = 16;
 
-  SchedDFSResult DFSResult;
-  BitVector ScheduledTrees;
+  ScheduleDAGMI *DAG;
   ILPOrder Cmp;
 
   std::vector<SUnit*> ReadyQ;
 public:
-  ILPScheduler(bool MaximizeILP)
-  : DFSResult(/*BottomUp=*/true, SubtreeLimit),
-    Cmp(&DFSResult, &ScheduledTrees, MaximizeILP) {}
+  ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
 
-  virtual void initialize(ScheduleDAGMI *DAG) {
+  virtual void initialize(ScheduleDAGMI *dag) {
+    DAG = dag;
+    DAG->initDFSResult();
+    Cmp.DFSResult = DAG->getDFSResult();
+    Cmp.ScheduledTrees = &DAG->getScheduledTrees();
     ReadyQ.clear();
-    DFSResult.clear();
-    DFSResult.resize(DAG->SUnits.size());
-    ScheduledTrees.clear();
   }
 
   virtual void registerRoots() {
-    DFSResult.compute(ReadyQ);
-    ScheduledTrees.resize(DFSResult.getNumSubtrees());
+    DAG->computeDFSResult(ReadyQ);
     // Restore the heap in ReadyQ with the updated DFS results.
     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
   }
@@ -2135,21 +2164,22 @@ public:
     IsTopNode = false;
     DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): "
           << *SU->getInstr()
-          << " ILP: " << DFSResult.getILP(SU)
-          << " Tree: " << DFSResult.getSubtreeID(SU) << " @"
-          << DFSResult.getSubtreeLevel(DFSResult.getSubtreeID(SU))<< '\n');
+          << " ILP: " << DAG->getDFSResult()->getILP(SU)
+          << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
+          << DAG->getDFSResult()->getSubtreeLevel(
+            DAG->getDFSResult()->getSubtreeID(SU)) << '\n');
     return SU;
   }
 
+  /// \brief Scheduler callback to notify that a new subtree is scheduled.
+  virtual void scheduleTree(unsigned SubtreeID) {
+    std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
+  }
+
   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
   /// DFSResults, and resort the priority Q.
   virtual void schedNode(SUnit *SU, bool IsTopNode) {
     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
-    if (!ScheduledTrees.test(DFSResult.getSubtreeID(SU))) {
-      ScheduledTrees.set(DFSResult.getSubtreeID(SU));
-      DFSResult.scheduleTree(DFSResult.getSubtreeID(SU));
-      std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
-    }
   }
 
   virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
diff --git a/test/CodeGen/X86/misched-matrix.ll b/test/CodeGen/X86/misched-matrix.ll
index f5566e5e5d..d00d17392a 100644
--- a/test/CodeGen/X86/misched-matrix.ll
+++ b/test/CodeGen/X86/misched-matrix.ll
@@ -8,6 +8,9 @@
 ; RUN:          -misched=ilpmax -verify-machineinstrs \
 ; RUN:     | FileCheck %s -check-prefix=ILPMAX
 ;
+; Very temporary xfail during SchedDFSResult churn.
+; XFAIL: *
+;
 ; Verify that the MI scheduler minimizes register pressure for a
 ; uniform set of bottom-up subtrees (unrolled matrix multiply).
 ;
-- 
cgit v1.2.3-70-g09d2


From 4e1fb1894048455d49d62543b3f83672b27b0000 Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Fri, 25 Jan 2013 06:33:57 +0000
Subject: MIsched: Improve the interface to SchedDFS analysis (subtrees).

Allow the strategy to select SchedDFS. Allow the results of SchedDFS
to affect initialization of the scheduler state.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173425 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/CodeGen/MachineScheduler.h        | 14 +++---
 include/llvm/CodeGen/ScheduleDFS.h             |  7 ++-
 lib/CodeGen/MachineScheduler.cpp               | 62 ++++++++++++++------------
 lib/CodeGen/ScheduleDAGInstrs.cpp              | 14 +++---
 lib/Target/Hexagon/HexagonMachineScheduler.cpp |  8 +++-
 test/CodeGen/X86/misched-matrix.ll             |  3 --
 6 files changed, 61 insertions(+), 47 deletions(-)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index cc697a50ab..aa78f5285a 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -316,12 +316,9 @@ public:
 
   const SUnit *getNextClusterSucc() const { return NextClusterSucc; }
 
-  /// Initialize a DFSResult after DAG building is complete, and before any
+  /// Compute a DFSResult after DAG building is complete, and before any
   /// queue comparisons.
-  void initDFSResult();
-
-  /// Compute DFS result once all interesting roots are discovered.
-  void computeDFSResult(ArrayRef<SUnit*> Roots);
+  void computeDFSResult();
 
   /// Return a non-null DFS result if the scheduling strategy initialized it.
   const SchedDFSResult *getDFSResult() const { return DFSResult; }
@@ -341,8 +338,8 @@ protected:
   /// instances of ScheduleDAGMI to perform custom DAG postprocessing.
   void postprocessDAG();
 
-  /// Identify DAG roots and setup scheduler queues.
-  void initQueues();
+  /// Release ExitSU predecessors and setup scheduler queues.
+  void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots);
 
   /// Move an instruction and update register pressure.
   void scheduleMI(SUnit *SU, bool IsTopNode);
@@ -365,7 +362,8 @@ protected:
   void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
   bool checkSchedLimit();
 
-  void releaseRoots();
+  void findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
+                             SmallVectorImpl<SUnit*> &BotRoots);
 
   void releaseSucc(SUnit *SU, SDep *SuccEdge);
   void releaseSuccessors(SUnit *SU);
diff --git a/include/llvm/CodeGen/ScheduleDFS.h b/include/llvm/CodeGen/ScheduleDFS.h
index 54b223269e..e07929290e 100644
--- a/include/llvm/CodeGen/ScheduleDFS.h
+++ b/include/llvm/CodeGen/ScheduleDFS.h
@@ -127,7 +127,7 @@ public:
   }
 
   /// \brief Compute various metrics for the DAG with given roots.
-  void compute(ArrayRef<SUnit *> Roots);
+  void compute(ArrayRef<SUnit> SUnits);
 
   /// \brief Get the ILP value for a DAG node.
   ///
@@ -140,7 +140,12 @@ public:
   unsigned getNumSubtrees() const { return SubtreeConnectLevels.size(); }
 
   /// \brief Get the ID of the subtree the given DAG node belongs to.
+  ///
+  /// For convenience, if DFSResults have not been computed yet, give everything
+  /// tree ID 0.
   unsigned getSubtreeID(const SUnit *SU) const {
+    if (empty())
+      return 0;
     assert(SU->NodeNum < DFSData.size() &&  "New Node");
     return DFSData[SU->NodeNum].SubtreeID;
   }
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 3e5935cca7..aa599158e9 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -510,10 +510,19 @@ void ScheduleDAGMI::schedule() {
 
   postprocessDAG();
 
+  SmallVector<SUnit*, 8> TopRoots, BotRoots;
+  findRootsAndBiasEdges(TopRoots, BotRoots);
+
+  // Initialize the strategy before modifying the DAG.
+  // This may initialize a DFSResult to be used for queue priority.
+  SchedImpl->initialize(this);
+
   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           SUnits[su].dumpAll(this));
+  if (ViewMISchedDAGs) viewGraph();
 
-  initQueues();
+  // Initialize ready queues now that the DAG and priority data are finalized.
+  initQueues(TopRoots, BotRoots);
 
   bool IsTopNode = false;
   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
@@ -561,25 +570,18 @@ void ScheduleDAGMI::postprocessDAG() {
   }
 }
 
-void ScheduleDAGMI::initDFSResult() {
+void ScheduleDAGMI::computeDFSResult() {
   if (!DFSResult)
     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
   DFSResult->clear();
-  DFSResult->resize(SUnits.size());
   ScheduledTrees.clear();
-}
-
-void ScheduleDAGMI::computeDFSResult(ArrayRef<SUnit*> Roots) {
-  DFSResult->compute(Roots);
+  DFSResult->resize(SUnits.size());
+  DFSResult->compute(SUnits);
   ScheduledTrees.resize(DFSResult->getNumSubtrees());
 }
 
-// Release all DAG roots for scheduling.
-//
-// Nodes with unreleased weak edges can still be roots.
-void ScheduleDAGMI::releaseRoots() {
-  SmallVector<SUnit*, 16> BotRoots;
-
+void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
+                                          SmallVectorImpl<SUnit*> &BotRoots) {
   for (std::vector<SUnit>::iterator
          I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
     SUnit *SU = &(*I);
@@ -589,28 +591,33 @@ void ScheduleDAGMI::releaseRoots() {
 
     // A SUnit is ready to top schedule if it has no predecessors.
     if (!I->NumPredsLeft && SU != &EntrySU)
-      SchedImpl->releaseTopNode(SU);
+      TopRoots.push_back(SU);
     // A SUnit is ready to bottom schedule if it has no successors.
     if (!I->NumSuccsLeft && SU != &ExitSU)
       BotRoots.push_back(SU);
   }
-  // Release bottom roots in reverse order so the higher priority nodes appear
-  // first. This is more natural and slightly more efficient.
-  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
-         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
-    SchedImpl->releaseBottomNode(*I);
 }
 
 /// Identify DAG roots and setup scheduler queues.
-void ScheduleDAGMI::initQueues() {
+void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
+                               ArrayRef<SUnit*> BotRoots) {
   NextClusterSucc = NULL;
   NextClusterPred = NULL;
 
-  // Initialize the strategy before modifying the DAG.
-  SchedImpl->initialize(this);
-
   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
-  releaseRoots();
+  //
+  // Nodes with unreleased weak edges can still be roots.
+  // Release top roots in forward order.
+  for (SmallVectorImpl<SUnit*>::const_iterator
+         I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
+    SchedImpl->releaseTopNode(*I);
+  }
+  // Release bottom roots in reverse order so the higher priority nodes appear
+  // first. This is more natural and slightly more efficient.
+  for (SmallVectorImpl<SUnit*>::const_reverse_iterator
+         I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
+    SchedImpl->releaseBottomNode(*I);
+  }
 
   releaseSuccessors(&EntrySU);
   releasePredecessors(&ExitSU);
@@ -1216,7 +1223,7 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
   Top.init(DAG, SchedModel, &Rem);
   Bot.init(DAG, SchedModel, &Rem);
 
-  DAG->initDFSResult();
+  DAG->computeDFSResult();
 
   // Initialize resource counts.
 
@@ -1278,8 +1285,6 @@ void ConvergingScheduler::registerRoots() {
       Rem.CriticalPath = (*I)->getDepth();
   }
   DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
-
-  DAG->computeDFSResult(Bot.Available.elements());
 }
 
 /// Does this SU have a hazard within the current instruction group.
@@ -2140,14 +2145,13 @@ public:
 
   virtual void initialize(ScheduleDAGMI *dag) {
     DAG = dag;
-    DAG->initDFSResult();
+    DAG->computeDFSResult();
     Cmp.DFSResult = DAG->getDFSResult();
     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
     ReadyQ.clear();
   }
 
   virtual void registerRoots() {
-    DAG->computeDFSResult(ReadyQ);
     // Restore the heap in ReadyQ with the updated DFS results.
     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
   }
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 428c1a4032..7ee5207570 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1188,16 +1188,20 @@ static bool hasDataSucc(const SUnit *SU) {
 
 /// Compute an ILP metric for all nodes in the subDAG reachable via depth-first
 /// search from this root.
-void SchedDFSResult::compute(ArrayRef<SUnit *> Roots) {
+void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
   if (!IsBottomUp)
     llvm_unreachable("Top-down ILP metric is unimplemnted");
 
   SchedDFSImpl Impl(*this);
-  for (ArrayRef<const SUnit*>::const_iterator
-         RootI = Roots.begin(), RootE = Roots.end(); RootI != RootE; ++RootI) {
+  for (ArrayRef<SUnit>::const_iterator
+         SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) {
+    const SUnit *SU = &*SI;
+    if (Impl.isVisited(SU) || hasDataSucc(SU))
+      continue;
+
     SchedDAGReverseDFS DFS;
-    Impl.visitPreorder(*RootI);
-    DFS.follow(*RootI);
+    Impl.visitPreorder(SU);
+    DFS.follow(SU);
     for (;;) {
       // Traverse the leftmost path as far as possible.
       while (DFS.getPred() != DFS.getPredEnd()) {
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index aef6830752..36dfaa4233 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -152,6 +152,12 @@ void VLIWMachineScheduler::schedule() {
   // Postprocess the DAG to add platform specific artificial dependencies.
   postprocessDAG();
 
+  SmallVector<SUnit*, 8> TopRoots, BotRoots;
+  findRootsAndBiasEdges(TopRoots, BotRoots);
+
+  // Initialize the strategy before modifying the DAG.
+  SchedImpl->initialize(this);
+
   // To view Height/Depth correctly, they should be accessed at least once.
   DEBUG(unsigned maxH = 0;
         for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
@@ -166,7 +172,7 @@ void VLIWMachineScheduler::schedule() {
   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
           SUnits[su].dumpAll(this));
 
-  initQueues();
+  initQueues(TopRoots, BotRoots);
 
   bool IsTopNode = false;
   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
diff --git a/test/CodeGen/X86/misched-matrix.ll b/test/CodeGen/X86/misched-matrix.ll
index d00d17392a..f5566e5e5d 100644
--- a/test/CodeGen/X86/misched-matrix.ll
+++ b/test/CodeGen/X86/misched-matrix.ll
@@ -8,9 +8,6 @@
 ; RUN:          -misched=ilpmax -verify-machineinstrs \
 ; RUN:     | FileCheck %s -check-prefix=ILPMAX
 ;
-; Very temporary xfail during SchedDFSResult churn.
-; XFAIL: *
-;
 ; Verify that the MI scheduler minimizes register pressure for a
 ; uniform set of bottom-up subtrees (unrolled matrix multiply).
 ;
-- 
cgit v1.2.3-70-g09d2


From 3084979ff27f48487c7421536144c41a36cae997 Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Fri, 25 Jan 2013 07:45:29 +0000
Subject: MachineScheduler support for viewGraph.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173432 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/CodeGen/MachineScheduler.h |  3 ++
 include/llvm/CodeGen/ScheduleDAG.h      |  4 +-
 lib/CodeGen/MachineScheduler.cpp        | 89 ++++++++++++++++++++++++++++++++-
 3 files changed, 93 insertions(+), 3 deletions(-)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index aa78f5285a..2e39161d11 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -325,6 +325,9 @@ public:
 
   BitVector &getScheduledTrees() { return ScheduledTrees; }
 
+  void viewGraph(const Twine &Name, const Twine &Title) LLVM_OVERRIDE;
+  void viewGraph() LLVM_OVERRIDE;
+
 protected:
   // Top-Level entry points for the schedule() driver...
 
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 18bef1c402..6792269810 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -578,8 +578,8 @@ namespace llvm {
     /// viewGraph - Pop up a GraphViz/gv window with the ScheduleDAG rendered
     /// using 'dot'.
     ///
-    void viewGraph(const Twine &Name, const Twine &Title);
-    void viewGraph();
+    virtual void viewGraph(const Twine &Name, const Twine &Title);
+    virtual void viewGraph();
 
     virtual void dumpNode(const SUnit *SU) const = 0;
 
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index aa599158e9..9072dd4b71 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GraphWriter.h"
 #include "llvm/Support/raw_ostream.h"
 #include <queue>
 
@@ -557,7 +558,6 @@ void ScheduleDAGMI::buildDAGWithRegPressure() {
 
   // Build the DAG, and compute current register pressure.
   buildSchedGraph(AA, &RPTracker);
-  if (ViewMISchedDAGs) viewGraph();
 
   // Initialize top/bottom trackers after computing region pressure.
   initRegPressure();
@@ -2294,3 +2294,90 @@ static MachineSchedRegistry ShufflerRegistry(
   "shuffle", "Shuffle machine instructions alternating directions",
   createInstructionShuffler);
 #endif // !NDEBUG
+
+//===----------------------------------------------------------------------===//
+// GraphWriter support for ScheduleDAGMI.
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+namespace llvm {
+
+template<> struct GraphTraits<
+  ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
+
+template<>
+struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+  static std::string getGraphName(const ScheduleDAG *G) {
+    return G->MF.getName();
+  }
+
+  static bool renderGraphFromBottomUp() {
+    return true;
+  }
+
+  static bool isNodeHidden(const SUnit *Node) {
+    return (Node->NumPreds > 10 || Node->NumSuccs > 10);
+  }
+
+  static bool hasNodeAddressLabel(const SUnit *Node,
+                                  const ScheduleDAG *Graph) {
+    return false;
+  }
+
+  /// If you want to override the dot attributes printed for a particular
+  /// edge, override this method.
+  static std::string getEdgeAttributes(const SUnit *Node,
+                                       SUnitIterator EI,
+                                       const ScheduleDAG *Graph) {
+    if (EI.isArtificialDep())
+      return "color=cyan,style=dashed";
+    if (EI.isCtrlDep())
+      return "color=blue,style=dashed";
+    return "";
+  }
+
+  static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
+    std::string Str;
+    raw_string_ostream SS(Str);
+    SS << "SU(" << SU->NodeNum << ')';
+    return SS.str();
+  }
+  static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
+    return G->getGraphNodeLabel(SU);
+  }
+
+  static std::string getNodeAttributes(const SUnit *N,
+                                       const ScheduleDAG *Graph) {
+    std::string Str("shape=Mrecord");
+    const SchedDFSResult *DFS =
+      static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult();
+    if (DFS) {
+      Str += ",style=filled,fillcolor=\"#";
+      Str += DOT::getColorString(DFS->getSubtreeID(N));
+      Str += '"';
+    }
+    return Str;
+  }
+};
+} // namespace llvm
+#endif // NDEBUG
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
+#ifndef NDEBUG
+  ViewGraph(this, Name, false, Title);
+#else
+  errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
+#endif  // NDEBUG
+}
+
+/// Out-of-line implementation with no arguments is handy for gdb.
+void ScheduleDAGMI::viewGraph() {
+  viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
+}
-- 
cgit v1.2.3-70-g09d2


From c855423ff2a18e2168324ac5902cfe862cd4b54f Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Fri, 25 Jan 2013 07:45:31 +0000
Subject: MIsched: Print block name. No functionality.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173433 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/MachineScheduler.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 9072dd4b71..821a4f2027 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -263,7 +263,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
       }
       DEBUG(dbgs() << "********** MI Scheduling **********\n");
       DEBUG(dbgs() << MF->getName()
-            << ":BB#" << MBB->getNumber() << "\n  From: " << *I << "    To: ";
+            << ":BB#" << MBB->getNumber() << " " << MBB->getName()
+            << "\n  From: " << *I << "    To: ";
             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
             else dbgs() << "End";
             dbgs() << " Remaining: " << RemainingInstrs << "\n");
-- 
cgit v1.2.3-70-g09d2


From b74564a15ac6bfa85c9597bed842b686be1a1a62 Mon Sep 17 00:00:00 2001
From: Jakub Staszak <kubastaszak@gmail.com>
Date: Fri, 25 Jan 2013 21:44:27 +0000
Subject: Use const reference instead of vector copying.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173497 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/MachineScheduler.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 821a4f2027..adf9a57dde 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -467,7 +467,8 @@ void ScheduleDAGMI::initRegPressure() {
   // Cache the list of excess pressure sets in this region. This will also track
   // the max pressure in the scheduled code for these sets.
   RegionCriticalPSets.clear();
-  std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
+  const std::vector<unsigned> &RegionPressure =
+    RPTracker.getPressure().MaxSetPressure;
   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
     unsigned Limit = TRI->getRegPressureSetLimit(i);
     DEBUG(dbgs() << TRI->getRegPressureSetName(i)
-- 
cgit v1.2.3-70-g09d2


From 4c6a2ba4e01ef75837bb39808a3935bd8687ce67 Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Tue, 29 Jan 2013 06:26:35 +0000
Subject: MIsched: cleanup code. Use isBoundaryNode().

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@173775 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/MachineScheduler.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index adf9a57dde..513d8a9268 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -587,17 +587,19 @@ void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
   for (std::vector<SUnit>::iterator
          I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
     SUnit *SU = &(*I);
+    assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
 
     // Order predecessors so DFSResult follows the critical path.
     SU->biasCriticalPath();
 
     // A SUnit is ready to top schedule if it has no predecessors.
-    if (!I->NumPredsLeft && SU != &EntrySU)
+    if (!I->NumPredsLeft)
       TopRoots.push_back(SU);
     // A SUnit is ready to bottom schedule if it has no successors.
-    if (!I->NumSuccsLeft && SU != &ExitSU)
+    if (!I->NumSuccsLeft)
       BotRoots.push_back(SU);
   }
+  ExitSU.biasCriticalPath();
 }
 
 /// Identify DAG roots and setup scheduler queues.
-- 
cgit v1.2.3-70-g09d2


From ecb8c2ba6029f02b01b20b110cc1b3b3ea2e1f1c Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Wed, 13 Feb 2013 19:22:27 +0000
Subject: MIsched: HazardRecognizers are created for each DAG. Free them.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175067 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/MachineScheduler.cpp               | 7 ++++++-
 lib/Target/Hexagon/HexagonMachineScheduler.cpp | 4 +++-
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 513d8a9268..ddaf56627b 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -1054,6 +1054,9 @@ public:
 #endif
 
     void reset() {
+      // A new HazardRec is created for each DAG and owned by SchedBoundary.
+      delete HazardRec;
+
       Available.clear();
       Pending.clear();
       CheckPending = false;
@@ -1079,7 +1082,8 @@ public:
     /// PendingFlag set.
     SchedBoundary(unsigned ID, const Twine &Name):
       DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
-      Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") {
+      Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
+      HazardRec(0) {
       reset();
     }
 
@@ -1223,6 +1227,7 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
   DAG = dag;
   SchedModel = DAG->getSchedModel();
   TRI = DAG->TRI;
+
   Rem.init(DAG, SchedModel);
   Top.init(DAG, SchedModel, &Rem);
   Bot.init(DAG, SchedModel, &Rem);
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 36dfaa4233..ced17b3c0a 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -192,6 +192,7 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
   DAG = static_cast<VLIWMachineScheduler*>(dag);
   SchedModel = DAG->getSchedModel();
   TRI = DAG->TRI;
+
   Top.init(DAG, SchedModel);
   Bot.init(DAG, SchedModel);
 
@@ -199,6 +200,8 @@ void ConvergingVLIWScheduler::initialize(ScheduleDAGMI *dag) {
   // are disabled, then these HazardRecs will be disabled.
   const InstrItineraryData *Itin = DAG->getSchedModel()->getInstrItineraries();
   const TargetMachine &TM = DAG->MF.getTarget();
+  delete Top.HazardRec;
+  delete Bot.HazardRec;
   Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
   Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
 
@@ -683,4 +686,3 @@ void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
     Bot.bumpNode(SU);
   }
 }
-
-- 
cgit v1.2.3-70-g09d2


From b717a5084722a5cad843444a8b1b4bf53f1c6325 Mon Sep 17 00:00:00 2001
From: Jakub Staszak <kubastaszak@gmail.com>
Date: Sat, 16 Feb 2013 15:47:26 +0000
Subject: Use const reference instead of vector object when passing an argument
 to updateScheduledPressure method.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175362 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/CodeGen/MachineScheduler.h | 2 +-
 lib/CodeGen/MachineScheduler.cpp        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index 2e39161d11..57febe7746 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -360,7 +360,7 @@ protected:
 
   void initRegPressure();
 
-  void updateScheduledPressure(std::vector<unsigned> NewMaxPressure);
+  void updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure);
 
   void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
   bool checkSchedLimit();
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index ddaf56627b..589fa1fa02 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -487,7 +487,7 @@ void ScheduleDAGMI::initRegPressure() {
 // FIXME: When the pressure tracker deals in pressure differences then we won't
 // iterate over all RegionCriticalPSets[i].
 void ScheduleDAGMI::
-updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
+updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {
   for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
     unsigned ID = RegionCriticalPSets[i].PSetID;
     int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
-- 
cgit v1.2.3-70-g09d2


From fff2d3aed9d50780d9d07859b03120dcd712d96e Mon Sep 17 00:00:00 2001
From: Andrew Trick <atrick@apple.com>
Date: Fri, 8 Mar 2013 05:40:34 +0000
Subject: Add -verify-misched option.

This verifies live intervals both before and after scheduling. It's
useful for anyone hacking on live interval update.

Note that we don't yet pass verification all the time. We don't yet
handle updating nonallocatable live intervals perfectly.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176685 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/MachineScheduler.cpp | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 589fa1fa02..7f4e36f236 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -57,6 +57,9 @@ static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
   cl::desc("Enable scheduling for macro fusion."), cl::init(true));
 
+static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
+  cl::desc("Verify machine instrs before and after machine scheduling"));
+
 // DAG subtrees must have at least this many nodes.
 static const unsigned MinSubtreeSize = 8;
 
@@ -197,6 +200,10 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   LIS = &getAnalysis<LiveIntervals>();
   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
 
+  if (VerifyScheduling) {
+    DEBUG(LIS->print(dbgs()));
+    MF->verify(this, "Before machine scheduling.");
+  }
   RegClassInfo->runOnMachineFunction(*MF);
 
   // Select the scheduler, or set the default.
@@ -285,6 +292,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   }
   Scheduler->finalizeSchedule();
   DEBUG(LIS->print(dbgs()));
+  if (VerifyScheduling)
+    MF->verify(this, "After machine scheduling.");
   return true;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 760fa5dc8022dcf6982969c26ef566dfbeea979c Mon Sep 17 00:00:00 2001
From: Jakub Staszak <kubastaszak@gmail.com>
Date: Sun, 10 Mar 2013 13:11:23 +0000
Subject: Cleanup #includes.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176787 91177308-0d34-0410-b5e6-96231b3b80d8
---
 include/llvm/CodeGen/ScheduleDAGInstrs.h       | 5 +----
 lib/CodeGen/MachineScheduler.cpp               | 2 ++
 lib/Target/Hexagon/HexagonMachineScheduler.cpp | 3 ++-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'lib/CodeGen/MachineScheduler.cpp')

diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 94abec2002..2219520ca1 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -15,18 +15,15 @@
 #ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
 #define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
 
-#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SparseSet.h"
 #include "llvm/ADT/SparseMultiSet.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include <map>
 
 namespace llvm {
+  class MachineFrameInfo;
   class MachineLoopInfo;
   class MachineDominatorTree;
   class LiveIntervals;
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 7f4e36f236..103b058c13 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -19,6 +19,8 @@
 #include "llvm/ADT/PriorityQueue.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/CodeGen/ScheduleDFS.h"
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 9d3c8b8c91..1388ad4f16 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -15,7 +15,8 @@
 #define DEBUG_TYPE "misched"
 
 #include "HexagonMachineScheduler.h"
-#include <queue>
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/IR/Function.h"
 
 using namespace llvm;
 
-- 
cgit v1.2.3-70-g09d2