diff options
Diffstat (limited to 'lib')
24 files changed, 612 insertions, 168 deletions
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index f0d830b11e..0eb009ddac 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -16,6 +16,7 @@ #define DEBUG_TYPE "scheduler" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { @@ -136,3 +137,16 @@ void LatencyPriorityQueue::remove(SUnit *SU) { std::swap(*I, Queue.back()); Queue.pop_back(); } + +#ifdef NDEBUG +void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {} +#else +void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const { + LatencyPriorityQueue q = *this; + while (!q.empty()) { + SUnit *su = q.pop(); + dbgs() << "Height " << su->getHeight() << ": "; + su->dump(DAG); + } +} +#endif diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index bd5b2b898b..60c24b7107 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -133,18 +133,12 @@ namespace { std::vector<unsigned> KillIndices; public: - SchedulePostRATDList(MachineFunction &MF, - const MachineLoopInfo &MLI, - const MachineDominatorTree &MDT, - ScheduleHazardRecognizer *HR, - AntiDepBreaker *ADB, - AliasAnalysis *aa) - : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), - HazardRec(HR), AntiDepBreak(ADB), AA(aa), - KillIndices(TRI->getNumRegs()) {} - - ~SchedulePostRATDList() { - } + SchedulePostRATDList( + MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, + AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode, + SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs); + + ~SchedulePostRATDList(); /// StartBlock - Initialize register live-range state for scheduling in /// this block. @@ -183,9 +177,34 @@ namespace { }; } +SchedulePostRATDList::SchedulePostRATDList( + MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, + AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode, + SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs) + : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA), + KillIndices(TRI->getNumRegs()) +{ + const TargetMachine &TM = MF.getTarget(); + const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); + HazardRec = + TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this); + AntiDepBreak = + ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? + (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, CriticalPathRCs) : + ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? + (AntiDepBreaker *)new CriticalAntiDepBreaker(MF) : NULL)); +} + +SchedulePostRATDList::~SchedulePostRATDList() { + delete HazardRec; + delete AntiDepBreak; +} + bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { - AA = &getAnalysis<AliasAnalysis>(); TII = Fn.getTarget().getInstrInfo(); + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); + AliasAnalysis *AA = &getAnalysis<AliasAnalysis>(); // Check for explicit enable/disable of post-ra scheduling. TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE; @@ -195,6 +214,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { return false; } else { // Check that post-RA scheduling is enabled for this target. + // This may upgrade the AntiDepMode. const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>(); if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs)) return false; @@ -210,19 +230,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { DEBUG(dbgs() << "PostRAScheduler\n"); - const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); - const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); - const TargetMachine &TM = Fn.getTarget(); - const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); - ScheduleHazardRecognizer *HR = - TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins); - AntiDepBreaker *ADB = - ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? - (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) : - ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? - (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL)); - - SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, ADB, AA); + SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, AntiDepMode, + CriticalPathRCs); // Loop over all of the basic blocks for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); @@ -270,9 +279,6 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { Scheduler.FixupKills(MBB); } - delete HR; - delete ADB; - return true; } @@ -617,13 +623,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { MinDepth = PendingQueue[i]->getDepth(); } - DEBUG(dbgs() << "\n*** Examining Available\n"; - LatencyPriorityQueue q = AvailableQueue; - while (!q.empty()) { - SUnit *su = q.pop(); - dbgs() << "Height " << su->getHeight() << ": "; - su->dump(this); - }); + DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this)); SUnit *FoundSUnit = 0; bool HasNoopHazards = false; @@ -631,7 +631,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { SUnit *CurSUnit = AvailableQueue.pop(); ScheduleHazardRecognizer::HazardType HT = - HazardRec->getHazardType(CurSUnit); + HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); if (HT == ScheduleHazardRecognizer::NoHazard) { FoundSUnit = CurSUnit; break; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index d2fbc0e28d..02e398f7ef 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "pre-RA-sched" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -33,6 +34,12 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf) ScheduleDAG::~ScheduleDAG() {} +/// getInstrDesc helper to handle SDNodes. +const TargetInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { + if (!Node->isMachineOpcode()) return NULL; + return &TII->get(Node->getMachineOpcode()); +} + /// dump - dump the schedule. void ScheduleDAG::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index d78b5d3ecb..b00e0cd099 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sched-hazard" +#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/Debug.h" @@ -23,29 +23,48 @@ using namespace llvm; +#ifndef NDEBUG +const char *ScoreboardHazardRecognizer::DebugType = ""; +#endif + ScoreboardHazardRecognizer:: -ScoreboardHazardRecognizer(const InstrItineraryData *LItinData) : - ScheduleHazardRecognizer(), ItinData(LItinData) { +ScoreboardHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *SchedDAG, + const char *ParentDebugType) : + ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0), + IssueCount(0) { + +#ifndef NDEBUG + DebugType = ParentDebugType; +#endif + // Determine the maximum depth of any itinerary. This determines the // depth of the scoreboard. We always make the scoreboard at least 1 // cycle deep to avoid dealing with the boundary condition. unsigned ScoreboardDepth = 1; if (ItinData && !ItinData->isEmpty()) { + IssueWidth = ItinData->IssueWidth; + for (unsigned idx = 0; ; ++idx) { if (ItinData->isEndMarker(idx)) break; const InstrStage *IS = ItinData->beginStage(idx); const InstrStage *E = ItinData->endStage(idx); + unsigned CurCycle = 0; unsigned ItinDepth = 0; - for (; IS != E; ++IS) - ItinDepth += IS->getCycles(); + for (; IS != E; ++IS) { + unsigned StageDepth = CurCycle + IS->getCycles(); + if (ItinDepth < StageDepth) ItinDepth = StageDepth; + CurCycle += IS->getNextCycles(); + } // Find the next power-of-2 >= ItinDepth while (ItinDepth > ScoreboardDepth) { ScoreboardDepth *= 2; } } + MaxLookAhead = ScoreboardDepth; } ReservedScoreboard.reset(ScoreboardDepth); @@ -56,6 +75,7 @@ ScoreboardHazardRecognizer(const InstrItineraryData *LItinData) : } void ScoreboardHazardRecognizer::Reset() { + IssueCount = 0; RequiredScoreboard.reset(); ReservedScoreboard.reset(); } @@ -76,24 +96,46 @@ void ScoreboardHazardRecognizer::Scoreboard::dump() const { } } +bool ScoreboardHazardRecognizer::atIssueLimit() const { + if (IssueWidth == 0) + return false; + + return IssueCount == IssueWidth; +} + ScheduleHazardRecognizer::HazardType -ScoreboardHazardRecognizer::getHazardType(SUnit *SU) { +ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (!ItinData || ItinData->isEmpty()) return NoHazard; - unsigned cycle = 0; + // Note that stalls will be negative for bottom-up scheduling. + int cycle = Stalls; // Use the itinerary for the underlying instruction to check for // free FU's in the scoreboard at the appropriate future cycles. - unsigned idx = SU->getInstr()->getDesc().getSchedClass(); + + const TargetInstrDesc *TID = DAG->getInstrDesc(SU); + if (TID == NULL) { + // Don't check hazards for non-machineinstr Nodes. + return NoHazard; + } + unsigned idx = TID->getSchedClass(); for (const InstrStage *IS = ItinData->beginStage(idx), *E = ItinData->endStage(idx); IS != E; ++IS) { // We must find one of the stage's units free for every cycle the // stage is occupied. FIXME it would be more accurate to find the // same unit free in all the cycles. for (unsigned int i = 0; i < IS->getCycles(); ++i) { - assert(((cycle + i) < RequiredScoreboard.getDepth()) && - "Scoreboard depth exceeded!"); + int StageCycle = cycle + (int)i; + if (StageCycle < 0) + continue; + + if (StageCycle >= (int)RequiredScoreboard.getDepth()) { + assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() && + "Scoreboard depth exceeded!"); + // This stage was stalled beyond pipeline depth, so cannot conflict. + break; + } unsigned freeUnits = IS->getUnits(); switch (IS->getReservationKind()) { @@ -101,18 +143,18 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU) { assert(0 && "Invalid FU reservation"); case InstrStage::Required: // Required FUs conflict with both reserved and required ones - freeUnits &= ~ReservedScoreboard[cycle + i]; + freeUnits &= ~ReservedScoreboard[StageCycle]; // FALLTHROUGH case InstrStage::Reserved: // Reserved FUs can conflict only with required ones. - freeUnits &= ~RequiredScoreboard[cycle + i]; + freeUnits &= ~RequiredScoreboard[StageCycle]; break; } if (!freeUnits) { DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", "); DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); - DEBUG(SU->getInstr()->dump()); + DEBUG(DAG->dumpNode(SU)); return Hazard; } } @@ -128,11 +170,15 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { if (!ItinData || ItinData->isEmpty()) return; + ++IssueCount; + unsigned cycle = 0; // Use the itinerary for the underlying instruction to reserve FU's // in the scoreboard at the appropriate future cycles. - unsigned idx = SU->getInstr()->getDesc().getSchedClass(); + const TargetInstrDesc *TID = DAG->getInstrDesc(SU); + assert(TID && "The scheduler must filter non-machineinstrs"); + unsigned idx = TID->getSchedClass(); for (const InstrStage *IS = ItinData->beginStage(idx), *E = ItinData->endStage(idx); IS != E; ++IS) { // We must reserve one of the stage's units for every cycle the @@ -179,11 +225,13 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { } void ScoreboardHazardRecognizer::AdvanceCycle() { + IssueCount = 0; ReservedScoreboard[0] = 0; ReservedScoreboard.advance(); RequiredScoreboard[0] = 0; RequiredScoreboard.advance(); } void ScoreboardHazardRecognizer::RecedeCycle() { + IssueCount = 0; ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0; ReservedScoreboard.recede(); RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index 11df0963c3..430283d5ef 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -63,11 +63,12 @@ private: public: ScheduleDAGList(MachineFunction &mf, - SchedulingPriorityQueue *availqueue, - ScheduleHazardRecognizer *HR) - : ScheduleDAGSDNodes(mf), - AvailableQueue(availqueue), HazardRec(HR) { - } + SchedulingPriorityQueue *availqueue) + : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) { + + const TargetMachine &tm = mf.getTarget(); + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + } ~ScheduleDAGList() { delete HazardRec; @@ -202,7 +203,7 @@ void ScheduleDAGList::ListScheduleTopDown() { SUnit *CurSUnit = AvailableQueue->pop(); ScheduleHazardRecognizer::HazardType HT = - HazardRec->getHazardType(CurSUnit); + HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); if (HT == ScheduleHazardRecognizer::NoHazard) { FoundSUnit = CurSUnit; break; @@ -257,12 +258,8 @@ void ScheduleDAGList::ListScheduleTopDown() { // Public Constructor Functions //===----------------------------------------------------------------------===// -/// createTDListDAGScheduler - This creates a top-down list scheduler with a -/// new hazard recognizer. This scheduler takes ownership of the hazard -/// recognizer and deletes it when done. +/// createTDListDAGScheduler - This creates a top-down list scheduler. ScheduleDAGSDNodes * llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { - return new ScheduleDAGList(*IS->MF, - new LatencyPriorityQueue(), - IS->CreateTargetHazardRecognizer()); + return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue()); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 8fc6eacbf6..c93ef4dd46 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -20,6 +20,7 @@ #include "llvm/InlineAsm.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -65,6 +66,11 @@ static RegisterScheduler "which tries to balance ILP and register pressure", createILPListDAGScheduler); +static cl::opt<bool> EnableSchedCycles( + "enable-sched-cycles", + cl::desc("Enable cycle-level precision during preRA scheduling"), + cl::init(false), cl::Hidden); + namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler @@ -83,9 +89,21 @@ private: /// AvailableQueue - The priority queue to use for the available SUnits. SchedulingPriorityQueue *AvailableQueue; + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands becomes available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit*> PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + /// CurCycle - The current scheduler state corresponds to this cycle. unsigned CurCycle; + /// MinAvailableCycle - Cycle of the soonest available instruction. + unsigned MinAvailableCycle; + /// LiveRegDefs - A set of physical registers and their definition /// that are "live". These nodes must be scheduled before any other nodes that /// modifies the registers can be scheduled. @@ -98,14 +116,22 @@ private: ScheduleDAGTopologicalSort Topo; public: - ScheduleDAGRRList(MachineFunction &mf, - bool isbottomup, bool needlatency, - SchedulingPriorityQueue *availqueue) - : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency), - AvailableQueue(availqueue), CurCycle(0), Topo(SUnits) { - } + ScheduleDAGRRList(MachineFunction &mf, bool needlatency, + SchedulingPriorityQueue *availqueue, + CodeGenOpt::Level OptLevel) + : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()), + NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), + Topo(SUnits) { + + const TargetMachine &tm = mf.getTarget(); + if (EnableSchedCycles && OptLevel != CodeGenOpt::None) + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + else + HazardRec = new ScheduleHazardRecognizer(); + } ~ScheduleDAGRRList() { + delete HazardRec; delete AvailableQueue; } @@ -139,20 +165,31 @@ public: } private: + bool isReady(SUnit *SU) { + return !EnableSchedCycles || !AvailableQueue->hasReadyFilter() || + AvailableQueue->isReady(SU); + } + void ReleasePred(SUnit *SU, const SDep *PredEdge); void ReleasePredecessors(SUnit *SU); void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); void ReleaseSuccessors(SUnit *SU); - void CapturePred(SDep *PredEdge); + void ReleasePending(); + void AdvanceToCycle(unsigned NextCycle); + void AdvancePastStalls(SUnit *SU); + void EmitNode(SUnit *SU); void ScheduleNodeBottomUp(SUnit*); + void CapturePred(SDep *PredEdge); void UnscheduleNodeBottomUp(SUnit*); - void BacktrackBottomUp(SUnit*, unsigned); + void RestoreHazardCheckerBottomUp(); + void BacktrackBottomUp(SUnit*, SUnit*); SUnit *CopyAndMoveSuccessors(SUnit*); void InsertCopiesAndMoveSuccs(SUnit*, unsigned, const TargetRegisterClass*, const TargetRegisterClass*, SmallVector<SUnit*, 2>&); bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); + SUnit *PickNodeToScheduleBottomUp(); void ListScheduleBottomUp(); @@ -198,6 +235,7 @@ void ScheduleDAGRRList::Schedule() { << " '" << BB->getName() << "' **********\n"); CurCycle = 0; + MinAvailableCycle = EnableSchedCycles ? UINT_MAX : 0; NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), NULL); LiveRegGens.resize(TRI->getNumRegs(), NULL); @@ -211,6 +249,8 @@ void ScheduleDAGRRList::Schedule() { AvailableQueue->initNodes(SUnits); + HazardRec->Reset(); + // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. if (isBottomUp) ListScheduleBottomUp(); @@ -249,7 +289,20 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { // to be scheduled. Ignore the special EntrySU node. if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { PredSU->isAvailable = true; - AvailableQueue->push(PredSU); + + unsigned Height = PredSU->getHeight(); + if (Height < MinAvailableCycle) + MinAvailableCycle = Height; + + if (isReady(SU)) { + AvailableQueue->push(PredSU); + } + // CapturePred and others may have left the node in the pending queue, avoid + // adding it twice. + else if (!PredSU->isPending) { + PredSU->isPending = true; + PendingQueue.push_back(PredSU); + } } } @@ -292,6 +345,127 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { } } +/// Check to see if any of the pending instructions are ready to issue. If +/// so, add them to the available queue. +void ScheduleDAGRRList::ReleasePending() { + assert(!EnableSchedCycles && "requires --enable-sched-cycles" ); + + // If the available queue is empty, it is safe to reset MinAvailableCycle. + if (AvailableQueue->empty()) + MinAvailableCycle = UINT_MAX; + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + unsigned ReadyCycle = + isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth(); + if (ReadyCycle < MinAvailableCycle) + MinAvailableCycle = ReadyCycle; + + if (PendingQueue[i]->isAvailable) { + if (!isReady(PendingQueue[i])) + continue; + AvailableQueue->push(PendingQueue[i]); + } + PendingQueue[i]->isPending = false; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } +} + +/// Move the scheduler state forward by the specified number of Cycles. +void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { + if (NextCycle <= CurCycle) + return; + + AvailableQueue->setCurCycle(NextCycle); + if (HazardRec->getMaxLookAhead() == 0) { + // Bypass lots of virtual calls in case of long latency. + CurCycle = NextCycle; + } + else { + for (; CurCycle != NextCycle; ++CurCycle) { + if (isBottomUp) + HazardRec->RecedeCycle(); + else + HazardRec->AdvanceCycle(); + } + } + // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the + // available Q to release pending nodes at least once before popping. + ReleasePending(); +} + +/// Move the scheduler state forward until the specified node's dependents are +/// ready and can be scheduled with no resource conflicts. +void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { + if (!EnableSchedCycles) + return; + + unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); + + // Bump CurCycle to account for latency. We assume the latency of other + // available instructions may be hidden by the stall (not a full pipe stall). + // This updates the hazard recognizer's cycle before reserving resources for + // this instruction. + AdvanceToCycle(ReadyCycle); + + // Calls are scheduled in their preceding cycle, so don't conflict with + // hazards from instructions after the call. EmitNode will reset the + // scoreboard state before emitting the call. + if (isBottomUp && SU->isCall) + return; + + // FIXME: For resource conflicts in very long non-pipelined stages, we + // should probably skip ahead here to avoid useless scoreboard checks. + int Stalls = 0; + while (true) { + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls); + + if (HT == ScheduleHazardRecognizer::NoHazard) + break; + + ++Stalls; + } + AdvanceToCycle(CurCycle + Stalls); +} + +/// Record this SUnit in the HazardRecognizer. +/// Does not update CurCycle. +void ScheduleDAGRRList::EmitNode(SUnit *SU) { + switch (SU->getNode()->getOpcode()) { + default: + assert(SU->getNode()->isMachineOpcode() && + "This target-independent node should not be scheduled."); + break; + case ISD::MERGE_VALUES: + case ISD::TokenFactor: + case ISD::CopyToReg: + case ISD::CopyFromReg: + case ISD::EH_LABEL: + // Noops don't affect the scoreboard state. Copies are likely to be + // removed. + return; + case ISD::INLINEASM: + // For inline asm, clear the pipeline state. + HazardRec->Reset(); + return; + } + if (isBottomUp && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + + HazardRec->EmitInstruction(SU); + + if (!isBottomUp && SU->isCall) { + HazardRec->Reset(); + } +} + /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. @@ -304,8 +478,15 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); #endif - // FIXME: Handle noop hazard. + // FIXME: Do not modify node height. It may interfere with + // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the + // node it's ready cycle can aid heuristics, and after scheduling it can + // indicate the scheduled cycle. SU->setHeightToAtLeast(CurCycle); + + // Reserve resources for the scheduled intruction. + EmitNode(SU); + Sequence.push_back(SU); AvailableQueue->ScheduledNode(SU); @@ -327,6 +508,15 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { } SU->isScheduled = true; + + // Conditions under which the scheduler should eagerly advance the cycle: + // (1) No available instructions + // (2) All pipelines full, so available instructions must have hazards. + // + // If SchedCycles is disabled, count each inst as one cycle. + if (!EnableSchedCycles || + AvailableQueue->empty() || HazardRec->atIssueLimit()) + AdvanceToCycle(CurCycle + 1); } /// CapturePred - This does the opposite of ReleasePred. Since SU is being @@ -377,31 +567,69 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { LiveRegGens[I->getReg()] = I->getSUnit(); } } + if (SU->getHeight() < MinAvailableCycle) + MinAvailableCycle = SU->getHeight(); SU->setHeightDirty(); SU->isScheduled = false; SU->isAvailable = true; - AvailableQueue->push(SU); + if (EnableSchedCycles && AvailableQueue->hasReadyFilter()) { + // Don't make available until backtracking is complete. + SU->isPending = true; + PendingQueue.push_back(SU); + } + else { + AvailableQueue->push(SU); + } AvailableQueue->UnscheduledNode(SU); } +/// After backtracking, the hazard checker needs to be restored to a state +/// corresponding the the current cycle. +void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { + HazardRec->Reset(); + + unsigned LookAhead = std::min((unsigned)Sequence.size(), + HazardRec->getMaxLookAhead()); + if (LookAhead == 0) + return; + + std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); + unsigned HazardCycle = (*I)->getHeight(); + for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { + SUnit *SU = *I; + for (; SU->getHeight() > HazardCycle; ++HazardCycle) { + HazardRec->RecedeCycle(); + } + EmitNode(SU); + } +} + /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in /// BTCycle in order to schedule a specific node. -void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle) { - SUnit *OldSU = NULL; - while (CurCycle > BtCycle) { - OldSU = Sequence.back(); +void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { + SUnit *OldSU = Sequence.back(); + while (true) { Sequence.pop_back(); if (SU->isSucc(OldSU)) // Don't try to remove SU from AvailableQueue. SU->isAvailable = false; + // FIXME: use ready cycle instead of height + CurCycle = OldSU->getHeight(); UnscheduleNodeBottomUp(OldSU); - --CurCycle; AvailableQueue->setCurCycle(CurCycle); + if (OldSU == BtSU) + break; + OldSU = Sequence.back(); } assert(!SU->isSucc(OldSU) && "Something is wrong!"); + RestoreHazardCheckerBottomUp(); + + if (EnableSchedCycles) + ReleasePending(); + ++NumBacktracks; } @@ -741,7 +969,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { /// Return a node that can be scheduled in this cycle. Requirements: /// (1) Ready: latency has been satisfied -/// (2) No Hazards: resources are available (TBD) +/// (2) No Hazards: resources are available /// (3) No Interferences: may unschedule to break register interferences. SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { SmallVector<SUnit*, 4> Interferences; @@ -777,24 +1005,26 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // Try unscheduling up to the point where it's safe to schedule // this node. - unsigned LiveCycle = CurCycle; + SUnit *BtSU = NULL; + unsigned LiveCycle = UINT_MAX; for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { |