diff options
Diffstat (limited to 'lib/CodeGen/MachineScheduler.cpp')
-rw-r--r-- | lib/CodeGen/MachineScheduler.cpp | 101 |
1 files changed, 47 insertions, 54 deletions
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index c7afa08fcd..c949266b8b 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -48,15 +48,6 @@ static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG -// Threshold to very roughly model an out-of-order processor's instruction -// buffers. If the actual value of this threshold matters much in practice, then -// it can be specified by the machine model. For now, it's an experimental -// tuning knob to determine when and if it matters. -static cl::opt<unsigned> ILPWindow("ilp-window", cl::Hidden, - cl::desc("Allow expected latency to exceed the critical path by N cycles " - "before attempting to balance ILP"), - cl::init(10U)); - // Experimental heuristics static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, cl::desc("Enable load clustering."), cl::init(true)); @@ -953,23 +944,26 @@ public: unsigned CritResIdx; // Number of micro-ops left to schedule. unsigned RemainingMicroOps; - // Is the unscheduled zone resource limited. - bool IsResourceLimited; - - unsigned MaxRemainingCount; void reset() { CriticalPath = 0; RemainingCounts.clear(); CritResIdx = 0; RemainingMicroOps = 0; - IsResourceLimited = false; - MaxRemainingCount = 0; } SchedRemainder() { reset(); } void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); + + unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const { + if (!SchedModel->hasInstrSchedModel()) + return 0; + + return std::max( + RemainingMicroOps * SchedModel->getMicroOpFactor(), + RemainingCounts[CritResIdx]); + } }; /// Each Scheduling boundary is associated with ready queues. It tracks the @@ -1010,9 +1004,6 @@ public: unsigned ExpectedCount; - // Policy flag: attempt to find ILP until expected latency is covered. - bool ShouldIncreaseILP; - #ifndef NDEBUG // Remember the greatest min operand latency. unsigned MaxMinLatency; @@ -1033,7 +1024,6 @@ public: CritResIdx = 0; IsResourceLimited = false; ExpectedCount = 0; - ShouldIncreaseILP = false; #ifndef NDEBUG MaxMinLatency = 0; #endif @@ -1061,7 +1051,7 @@ public: unsigned getUnscheduledLatency(SUnit *SU) const { if (isTop()) return SU->getHeight(); - return SU->getDepth(); + return SU->getDepth() + SU->Latency; } unsigned getCriticalCount() const { @@ -1070,7 +1060,7 @@ public: bool checkHazard(SUnit *SU); - void checkILPPolicy(); + void setLatencyPolicy(CandPolicy &Policy); void releaseNode(SUnit *SU, unsigned ReadyCycle); @@ -1166,6 +1156,13 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { RemainingCounts[PIdx] += (Factor * PI->Cycles); } } + for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds(); + PIdx != PEnd; ++PIdx) { + if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx]) + >= (int)SchedModel->getLatencyFactor()) { + CritResIdx = PIdx; + } + } } void ConvergingScheduler::SchedBoundary:: @@ -1203,7 +1200,7 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) { if (SU->isScheduled) return; - for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; unsigned MinLatency = I->getMinLatency(); @@ -1274,12 +1271,28 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { return false; } -/// If expected latency is covered, disable ILP policy. -void ConvergingScheduler::SchedBoundary::checkILPPolicy() { - if (ShouldIncreaseILP - && (IsResourceLimited || ExpectedLatency <= CurrCycle)) { - ShouldIncreaseILP = false; - DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n'); +/// Compute the remaining latency to determine whether ILP should be increased. +void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { + // FIXME: compile time. In all, we visit four queues here one we should only + // need to visit the one that was last popped if we cache the result. + unsigned RemLatency = 0; + for (ReadyQueue::iterator I = Available.begin(), E = Available.end(); + I != E; ++I) { + unsigned L = getUnscheduledLatency(*I); + if (L > RemLatency) + RemLatency = L; + } + for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end(); + I != E; ++I) { + unsigned L = getUnscheduledLatency(*I); + if (L > RemLatency) + RemLatency = L; + } + unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow(); + if (RemLatency + ExpectedLatency >= CriticalPathLimit + && RemLatency > Rem->getMaxRemainingCount(SchedModel)) { + Policy.ReduceLatency = true; + DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n'); } } @@ -1298,15 +1311,6 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, // Record this node as an immediate dependent of the scheduled node. NextSUs.insert(SU); - - // If CriticalPath has been computed, then check if the unscheduled nodes - // exceed the ILP window. Before registerRoots, CriticalPath==0. - if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU) - > Rem->CriticalPath + ILPWindow)) { - ShouldIncreaseILP = true; - DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " " - << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n'); - } } /// Move the boundary of scheduled code by one cycle. @@ -1354,9 +1358,6 @@ void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); Rem->RemainingCounts[PIdx] -= Count; - // Reset MaxRemainingCount for sanity. - Rem->MaxRemainingCount = 0; - // Check if this resource exceeds the current critical resource by a full // cycle. If so, it becomes the critical resource. if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) @@ -1488,9 +1489,7 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { /// resources. /// /// If the CriticalZone is latency limited, don't force a policy for the -/// candidates here. Instead, When releasing each candidate, releaseNode -/// compares the region's critical path to the candidate's height or depth and -/// the scheduled zone's expected latency then sets ShouldIncreaseILP. +/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed. void ConvergingScheduler::balanceZones( ConvergingScheduler::SchedBoundary &CriticalZone, ConvergingScheduler::SchedCandidate &CriticalCand, @@ -1499,6 +1498,7 @@ void ConvergingScheduler::balanceZones( if (!CriticalZone.IsResourceLimited) return; + assert(SchedModel->hasInstrSchedModel() && "required schedmodel"); SchedRemainder *Rem = CriticalZone.Rem; @@ -1506,7 +1506,7 @@ void ConvergingScheduler::balanceZones( // remainder, try to reduce it. unsigned RemainingCritCount = Rem->RemainingCounts[CriticalZone.CritResIdx]; - if ((int)(Rem->MaxRemainingCount - RemainingCritCount) + if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount) > (int)SchedModel->getLatencyFactor()) { CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " @@ -1532,12 +1532,9 @@ void ConvergingScheduler::checkResourceLimits( ConvergingScheduler::SchedCandidate &TopCand, ConvergingScheduler::SchedCandidate &BotCand) { - Bot.checkILPPolicy(); - Top.checkILPPolicy(); - if (Bot.ShouldIncreaseILP) - BotCand.Policy.ReduceLatency = true; - if (Top.ShouldIncreaseILP) - TopCand.Policy.ReduceLatency = true; + // Set ReduceLatency to true if needed. + Bot.setLatencyPolicy(BotCand.Policy); + Top.setLatencyPolicy(TopCand.Policy); // Handle resource-limited regions. if (Top.IsResourceLimited && Bot.IsResourceLimited @@ -1572,9 +1569,6 @@ void ConvergingScheduler::checkResourceLimits( // The critical resource is different in each zone, so request balancing. // Compute the cost of each zone. - Rem.MaxRemainingCount = std::max( - Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(), - Rem.RemainingCounts[Rem.CritResIdx]); Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); Top.ExpectedCount = std::max( Top.getCriticalCount(), @@ -1896,7 +1890,6 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, Cand.setBest(TryCand); DEBUG(traceCandidate(Cand, Zone)); } - TryCand.SU = *I; } } |