diff options
Diffstat (limited to 'lib')
24 files changed, 612 insertions, 168 deletions
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index f0d830b11e..0eb009ddac 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -16,6 +16,7 @@ #define DEBUG_TYPE "scheduler" #include "llvm/CodeGen/LatencyPriorityQueue.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { @@ -136,3 +137,16 @@ void LatencyPriorityQueue::remove(SUnit *SU) { std::swap(*I, Queue.back()); Queue.pop_back(); } + +#ifdef NDEBUG +void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {} +#else +void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const { + LatencyPriorityQueue q = *this; + while (!q.empty()) { + SUnit *su = q.pop(); + dbgs() << "Height " << su->getHeight() << ": "; + su->dump(DAG); + } +} +#endif diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index bd5b2b898b..60c24b7107 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -133,18 +133,12 @@ namespace { std::vector<unsigned> KillIndices; public: - SchedulePostRATDList(MachineFunction &MF, - const MachineLoopInfo &MLI, - const MachineDominatorTree &MDT, - ScheduleHazardRecognizer *HR, - AntiDepBreaker *ADB, - AliasAnalysis *aa) - : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), - HazardRec(HR), AntiDepBreak(ADB), AA(aa), - KillIndices(TRI->getNumRegs()) {} - - ~SchedulePostRATDList() { - } + SchedulePostRATDList( + MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, + AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode, + SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs); + + ~SchedulePostRATDList(); /// StartBlock - Initialize register live-range state for scheduling in /// this block. @@ -183,9 +177,34 @@ namespace { }; } +SchedulePostRATDList::SchedulePostRATDList( + MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, + AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode, + SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs) + : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA), + KillIndices(TRI->getNumRegs()) +{ + const TargetMachine &TM = MF.getTarget(); + const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); + HazardRec = + TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this); + AntiDepBreak = + ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? + (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, CriticalPathRCs) : + ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? + (AntiDepBreaker *)new CriticalAntiDepBreaker(MF) : NULL)); +} + +SchedulePostRATDList::~SchedulePostRATDList() { + delete HazardRec; + delete AntiDepBreak; +} + bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { - AA = &getAnalysis<AliasAnalysis>(); TII = Fn.getTarget().getInstrInfo(); + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); + AliasAnalysis *AA = &getAnalysis<AliasAnalysis>(); // Check for explicit enable/disable of post-ra scheduling. TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE; @@ -195,6 +214,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { return false; } else { // Check that post-RA scheduling is enabled for this target. + // This may upgrade the AntiDepMode. const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>(); if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs)) return false; @@ -210,19 +230,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { DEBUG(dbgs() << "PostRAScheduler\n"); - const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); - const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); - const TargetMachine &TM = Fn.getTarget(); - const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); - ScheduleHazardRecognizer *HR = - TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins); - AntiDepBreaker *ADB = - ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? - (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) : - ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ? - (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL)); - - SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, ADB, AA); + SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, AntiDepMode, + CriticalPathRCs); // Loop over all of the basic blocks for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); @@ -270,9 +279,6 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { Scheduler.FixupKills(MBB); } - delete HR; - delete ADB; - return true; } @@ -617,13 +623,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { MinDepth = PendingQueue[i]->getDepth(); } - DEBUG(dbgs() << "\n*** Examining Available\n"; - LatencyPriorityQueue q = AvailableQueue; - while (!q.empty()) { - SUnit *su = q.pop(); - dbgs() << "Height " << su->getHeight() << ": "; - su->dump(this); - }); + DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this)); SUnit *FoundSUnit = 0; bool HasNoopHazards = false; @@ -631,7 +631,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { SUnit *CurSUnit = AvailableQueue.pop(); ScheduleHazardRecognizer::HazardType HT = - HazardRec->getHazardType(CurSUnit); + HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); if (HT == ScheduleHazardRecognizer::NoHazard) { FoundSUnit = CurSUnit; break; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index d2fbc0e28d..02e398f7ef 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "pre-RA-sched" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -33,6 +34,12 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf) ScheduleDAG::~ScheduleDAG() {} +/// getInstrDesc helper to handle SDNodes. +const TargetInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { + if (!Node->isMachineOpcode()) return NULL; + return &TII->get(Node->getMachineOpcode()); +} + /// dump - dump the schedule. void ScheduleDAG::dumpSchedule() const { for (unsigned i = 0, e = Sequence.size(); i != e; i++) { diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index d78b5d3ecb..b00e0cd099 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sched-hazard" +#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Support/Debug.h" @@ -23,29 +23,48 @@ using namespace llvm; +#ifndef NDEBUG +const char *ScoreboardHazardRecognizer::DebugType = ""; +#endif + ScoreboardHazardRecognizer:: -ScoreboardHazardRecognizer(const InstrItineraryData *LItinData) : - ScheduleHazardRecognizer(), ItinData(LItinData) { +ScoreboardHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *SchedDAG, + const char *ParentDebugType) : + ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0), + IssueCount(0) { + +#ifndef NDEBUG + DebugType = ParentDebugType; +#endif + // Determine the maximum depth of any itinerary. This determines the // depth of the scoreboard. We always make the scoreboard at least 1 // cycle deep to avoid dealing with the boundary condition. unsigned ScoreboardDepth = 1; if (ItinData && !ItinData->isEmpty()) { + IssueWidth = ItinData->IssueWidth; + for (unsigned idx = 0; ; ++idx) { if (ItinData->isEndMarker(idx)) break; const InstrStage *IS = ItinData->beginStage(idx); const InstrStage *E = ItinData->endStage(idx); + unsigned CurCycle = 0; unsigned ItinDepth = 0; - for (; IS != E; ++IS) - ItinDepth += IS->getCycles(); + for (; IS != E; ++IS) { + unsigned StageDepth = CurCycle + IS->getCycles(); + if (ItinDepth < StageDepth) ItinDepth = StageDepth; + CurCycle += IS->getNextCycles(); + } // Find the next power-of-2 >= ItinDepth while (ItinDepth > ScoreboardDepth) { ScoreboardDepth *= 2; } } + MaxLookAhead = ScoreboardDepth; } ReservedScoreboard.reset(ScoreboardDepth); @@ -56,6 +75,7 @@ ScoreboardHazardRecognizer(const InstrItineraryData *LItinData) : } void ScoreboardHazardRecognizer::Reset() { + IssueCount = 0; RequiredScoreboard.reset(); ReservedScoreboard.reset(); } @@ -76,24 +96,46 @@ void ScoreboardHazardRecognizer::Scoreboard::dump() const { } } +bool ScoreboardHazardRecognizer::atIssueLimit() const { + if (IssueWidth == 0) + return false; + + return IssueCount == IssueWidth; +} + ScheduleHazardRecognizer::HazardType -ScoreboardHazardRecognizer::getHazardType(SUnit *SU) { +ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (!ItinData || ItinData->isEmpty()) return NoHazard; - unsigned cycle = 0; + // Note that stalls will be negative for bottom-up scheduling. + int cycle = Stalls; // Use the itinerary for the underlying instruction to check for // free FU's in the scoreboard at the appropriate future cycles. - unsigned idx = SU->getInstr()->getDesc().getSchedClass(); + + const TargetInstrDesc *TID = DAG->getInstrDesc(SU); + if (TID == NULL) { + // Don't check hazards for non-machineinstr Nodes. + return NoHazard; + } + unsigned idx = TID->getSchedClass(); for (const InstrStage *IS = ItinData->beginStage(idx), *E = ItinData->endStage(idx); IS != E; ++IS) { // We must find one of the stage's units free for every cycle the // stage is occupied. FIXME it would be more accurate to find the // same unit free in all the cycles. for (unsigned int i = 0; i < IS->getCycles(); ++i) { - assert(((cycle + i) < RequiredScoreboard.getDepth()) && - "Scoreboard depth exceeded!"); + int StageCycle = cycle + (int)i; + if (StageCycle < 0) + continue; + + if (StageCycle >= (int)RequiredScoreboard.getDepth()) { + assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() && + "Scoreboard depth exceeded!"); + // This stage was stalled beyond pipeline depth, so cannot conflict. + break; + } unsigned freeUnits = IS->getUnits(); switch (IS->getReservationKind()) { @@ -101,18 +143,18 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU) { assert(0 && "Invalid FU reservation"); case InstrStage::Required: // Required FUs conflict with both reserved and required ones - freeUnits &= ~ReservedScoreboard[cycle + i]; + freeUnits &= ~ReservedScoreboard[StageCycle]; // FALLTHROUGH case InstrStage::Reserved: // Reserved FUs can conflict only with required ones. - freeUnits &= ~RequiredScoreboard[cycle + i]; + freeUnits &= ~RequiredScoreboard[StageCycle]; break; } if (!freeUnits) { DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", "); DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); - DEBUG(SU->getInstr()->dump()); + DEBUG(DAG->dumpNode(SU)); return Hazard; } } @@ -128,11 +170,15 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { if (!ItinData || ItinData->isEmpty()) return; + ++IssueCount; + unsigned cycle = 0; // Use the itinerary for the underlying instruction to reserve FU's // in the scoreboard at the appropriate future cycles. - unsigned idx = SU->getInstr()->getDesc().getSchedClass(); + const TargetInstrDesc *TID = DAG->getInstrDesc(SU); + assert(TID && "The scheduler must filter non-machineinstrs"); + unsigned idx = TID->getSchedClass(); for (const InstrStage *IS = ItinData->beginStage(idx), *E = ItinData->endStage(idx); IS != E; ++IS) { // We must reserve one of the stage's units for every cycle the @@ -179,11 +225,13 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { } void ScoreboardHazardRecognizer::AdvanceCycle() { + IssueCount = 0; ReservedScoreboard[0] = 0; ReservedScoreboard.advance(); RequiredScoreboard[0] = 0; RequiredScoreboard.advance(); } void ScoreboardHazardRecognizer::RecedeCycle() { + IssueCount = 0; ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0; ReservedScoreboard.recede(); RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp index 11df0963c3..430283d5ef 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp @@ -63,11 +63,12 @@ private: public: ScheduleDAGList(MachineFunction &mf, - SchedulingPriorityQueue *availqueue, - ScheduleHazardRecognizer *HR) - : ScheduleDAGSDNodes(mf), - AvailableQueue(availqueue), HazardRec(HR) { - } + SchedulingPriorityQueue *availqueue) + : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) { + + const TargetMachine &tm = mf.getTarget(); + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + } ~ScheduleDAGList() { delete HazardRec; @@ -202,7 +203,7 @@ void ScheduleDAGList::ListScheduleTopDown() { SUnit *CurSUnit = AvailableQueue->pop(); ScheduleHazardRecognizer::HazardType HT = - HazardRec->getHazardType(CurSUnit); + HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); if (HT == ScheduleHazardRecognizer::NoHazard) { FoundSUnit = CurSUnit; break; @@ -257,12 +258,8 @@ void ScheduleDAGList::ListScheduleTopDown() { // Public Constructor Functions //===----------------------------------------------------------------------===// -/// createTDListDAGScheduler - This creates a top-down list scheduler with a -/// new hazard recognizer. This scheduler takes ownership of the hazard -/// recognizer and deletes it when done. +/// createTDListDAGScheduler - This creates a top-down list scheduler. ScheduleDAGSDNodes * llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { - return new ScheduleDAGList(*IS->MF, - new LatencyPriorityQueue(), - IS->CreateTargetHazardRecognizer()); + return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue()); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 8fc6eacbf6..c93ef4dd46 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -20,6 +20,7 @@ #include "llvm/InlineAsm.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" @@ -65,6 +66,11 @@ static RegisterScheduler "which tries to balance ILP and register pressure", createILPListDAGScheduler); +static cl::opt<bool> EnableSchedCycles( + "enable-sched-cycles", + cl::desc("Enable cycle-level precision during preRA scheduling"), + cl::init(false), cl::Hidden); + namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler @@ -83,9 +89,21 @@ private: /// AvailableQueue - The priority queue to use for the available SUnits. SchedulingPriorityQueue *AvailableQueue; + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands becomes available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit*> PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + /// CurCycle - The current scheduler state corresponds to this cycle. unsigned CurCycle; + /// MinAvailableCycle - Cycle of the soonest available instruction. + unsigned MinAvailableCycle; + /// LiveRegDefs - A set of physical registers and their definition /// that are "live". These nodes must be scheduled before any other nodes that /// modifies the registers can be scheduled. @@ -98,14 +116,22 @@ private: ScheduleDAGTopologicalSort Topo; public: - ScheduleDAGRRList(MachineFunction &mf, - bool isbottomup, bool needlatency, - SchedulingPriorityQueue *availqueue) - : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency), - AvailableQueue(availqueue), CurCycle(0), Topo(SUnits) { - } + ScheduleDAGRRList(MachineFunction &mf, bool needlatency, + SchedulingPriorityQueue *availqueue, + CodeGenOpt::Level OptLevel) + : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()), + NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), + Topo(SUnits) { + + const TargetMachine &tm = mf.getTarget(); + if (EnableSchedCycles && OptLevel != CodeGenOpt::None) + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + else + HazardRec = new ScheduleHazardRecognizer(); + } ~ScheduleDAGRRList() { + delete HazardRec; delete AvailableQueue; } @@ -139,20 +165,31 @@ public: } private: + bool isReady(SUnit *SU) { + return !EnableSchedCycles || !AvailableQueue->hasReadyFilter() || + AvailableQueue->isReady(SU); + } + void ReleasePred(SUnit *SU, const SDep *PredEdge); void ReleasePredecessors(SUnit *SU); void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); void ReleaseSuccessors(SUnit *SU); - void CapturePred(SDep *PredEdge); + void ReleasePending(); + void AdvanceToCycle(unsigned NextCycle); + void AdvancePastStalls(SUnit *SU); + void EmitNode(SUnit *SU); void ScheduleNodeBottomUp(SUnit*); + void CapturePred(SDep *PredEdge); void UnscheduleNodeBottomUp(SUnit*); - void BacktrackBottomUp(SUnit*, unsigned); + void RestoreHazardCheckerBottomUp(); + void BacktrackBottomUp(SUnit*, SUnit*); SUnit *CopyAndMoveSuccessors(SUnit*); void InsertCopiesAndMoveSuccs(SUnit*, unsigned, const TargetRegisterClass*, const TargetRegisterClass*, SmallVector<SUnit*, 2>&); bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); + SUnit *PickNodeToScheduleBottomUp(); void ListScheduleBottomUp(); @@ -198,6 +235,7 @@ void ScheduleDAGRRList::Schedule() { << " '" << BB->getName() << "' **********\n"); CurCycle = 0; + MinAvailableCycle = EnableSchedCycles ? UINT_MAX : 0; NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), NULL); LiveRegGens.resize(TRI->getNumRegs(), NULL); @@ -211,6 +249,8 @@ void ScheduleDAGRRList::Schedule() { AvailableQueue->initNodes(SUnits); + HazardRec->Reset(); + // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. if (isBottomUp) ListScheduleBottomUp(); @@ -249,7 +289,20 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { // to be scheduled. Ignore the special EntrySU node. if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { PredSU->isAvailable = true; - AvailableQueue->push(PredSU); + + unsigned Height = PredSU->getHeight(); + if (Height < MinAvailableCycle) + MinAvailableCycle = Height; + + if (isReady(SU)) { + AvailableQueue->push(PredSU); + } + // CapturePred and others may have left the node in the pending queue, avoid + // adding it twice. + else if (!PredSU->isPending) { + PredSU->isPending = true; + PendingQueue.push_back(PredSU); + } } } @@ -292,6 +345,127 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { } } +/// Check to see if any of the pending instructions are ready to issue. If +/// so, add them to the available queue. +void ScheduleDAGRRList::ReleasePending() { + assert(!EnableSchedCycles && "requires --enable-sched-cycles" ); + + // If the available queue is empty, it is safe to reset MinAvailableCycle. + if (AvailableQueue->empty()) + MinAvailableCycle = UINT_MAX; + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + unsigned ReadyCycle = + isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth(); + if (ReadyCycle < MinAvailableCycle) + MinAvailableCycle = ReadyCycle; + + if (PendingQueue[i]->isAvailable) { + if (!isReady(PendingQueue[i])) + continue; + AvailableQueue->push(PendingQueue[i]); + } + PendingQueue[i]->isPending = false; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } +} + +/// Move the scheduler state forward by the specified number of Cycles. +void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { + if (NextCycle <= CurCycle) + return; + + AvailableQueue->setCurCycle(NextCycle); + if (HazardRec->getMaxLookAhead() == 0) { + // Bypass lots of virtual calls in case of long latency. + CurCycle = NextCycle; + } + else { + for (; CurCycle != NextCycle; ++CurCycle) { + if (isBottomUp) + HazardRec->RecedeCycle(); + else + HazardRec->AdvanceCycle(); + } + } + // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the + // available Q to release pending nodes at least once before popping. + ReleasePending(); +} + +/// Move the scheduler state forward until the specified node's dependents are +/// ready and can be scheduled with no resource conflicts. +void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { + if (!EnableSchedCycles) + return; + + unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); + + // Bump CurCycle to account for latency. We assume the latency of other + // available instructions may be hidden by the stall (not a full pipe stall). + // This updates the hazard recognizer's cycle before reserving resources for + // this instruction. + AdvanceToCycle(ReadyCycle); + + // Calls are scheduled in their preceding cycle, so don't conflict with + // hazards from instructions after the call. EmitNode will reset the + // scoreboard state before emitting the call. + if (isBottomUp && SU->isCall) + return; + + // FIXME: For resource conflicts in very long non-pipelined stages, we + // should probably skip ahead here to avoid useless scoreboard checks. + int Stalls = 0; + while (true) { + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls); + + if (HT == ScheduleHazardRecognizer::NoHazard) + break; + + ++Stalls; + } + AdvanceToCycle(CurCycle + Stalls); +} + +/// Record this SUnit in the HazardRecognizer. +/// Does not update CurCycle. +void ScheduleDAGRRList::EmitNode(SUnit *SU) { + switch (SU->getNode()->getOpcode()) { + default: + assert(SU->getNode()->isMachineOpcode() && + "This target-independent node should not be scheduled."); + break; + case ISD::MERGE_VALUES: + case ISD::TokenFactor: + case ISD::CopyToReg: + case ISD::CopyFromReg: + case ISD::EH_LABEL: + // Noops don't affect the scoreboard state. Copies are likely to be + // removed. + return; + case ISD::INLINEASM: + // For inline asm, clear the pipeline state. + HazardRec->Reset(); + return; + } + if (isBottomUp && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + + HazardRec->EmitInstruction(SU); + + if (!isBottomUp && SU->isCall) { + HazardRec->Reset(); + } +} + /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. @@ -304,8 +478,15 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); #endif - // FIXME: Handle noop hazard. + // FIXME: Do not modify node height. It may interfere with + // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the + // node it's ready cycle can aid heuristics, and after scheduling it can + // indicate the scheduled cycle. SU->setHeightToAtLeast(CurCycle); + + // Reserve resources for the scheduled intruction. + EmitNode(SU); + Sequence.push_back(SU); AvailableQueue->ScheduledNode(SU); @@ -327,6 +508,15 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { } SU->isScheduled = true; + + // Conditions under which the scheduler should eagerly advance the cycle: + // (1) No available instructions + // (2) All pipelines full, so available instructions must have hazards. + // + // If SchedCycles is disabled, count each inst as one cycle. + if (!EnableSchedCycles || + AvailableQueue->empty() || HazardRec->atIssueLimit()) + AdvanceToCycle(CurCycle + 1); } /// CapturePred - This does the opposite of ReleasePred. Since SU is being @@ -377,31 +567,69 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { LiveRegGens[I->getReg()] = I->getSUnit(); } } + if (SU->getHeight() < MinAvailableCycle) + MinAvailableCycle = SU->getHeight(); SU->setHeightDirty(); SU->isScheduled = false; SU->isAvailable = true; - AvailableQueue->push(SU); + if (EnableSchedCycles && AvailableQueue->hasReadyFilter()) { + // Don't make available until backtracking is complete. + SU->isPending = true; + PendingQueue.push_back(SU); + } + else { + AvailableQueue->push(SU); + } AvailableQueue->UnscheduledNode(SU); } +/// After backtracking, the hazard checker needs to be restored to a state +/// corresponding the the current cycle. +void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { + HazardRec->Reset(); + + unsigned LookAhead = std::min((unsigned)Sequence.size(), + HazardRec->getMaxLookAhead()); + if (LookAhead == 0) + return; + + std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); + unsigned HazardCycle = (*I)->getHeight(); + for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { + SUnit *SU = *I; + for (; SU->getHeight() > HazardCycle; ++HazardCycle) { + HazardRec->RecedeCycle(); + } + EmitNode(SU); + } +} + /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in /// BTCycle in order to schedule a specific node. -void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle) { - SUnit *OldSU = NULL; - while (CurCycle > BtCycle) { - OldSU = Sequence.back(); +void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { + SUnit *OldSU = Sequence.back(); + while (true) { Sequence.pop_back(); if (SU->isSucc(OldSU)) // Don't try to remove SU from AvailableQueue. SU->isAvailable = false; + // FIXME: use ready cycle instead of height + CurCycle = OldSU->getHeight(); UnscheduleNodeBottomUp(OldSU); - --CurCycle; AvailableQueue->setCurCycle(CurCycle); + if (OldSU == BtSU) + break; + OldSU = Sequence.back(); } assert(!SU->isSucc(OldSU) && "Something is wrong!"); + RestoreHazardCheckerBottomUp(); + + if (EnableSchedCycles) + ReleasePending(); + ++NumBacktracks; } @@ -741,7 +969,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { /// Return a node that can be scheduled in this cycle. Requirements: /// (1) Ready: latency has been satisfied -/// (2) No Hazards: resources are available (TBD) +/// (2) No Hazards: resources are available /// (3) No Interferences: may unschedule to break register interferences. SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { SmallVector<SUnit*, 4> Interferences; @@ -777,24 +1005,26 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // Try unscheduling up to the point where it's safe to schedule // this node. - unsigned LiveCycle = CurCycle; + SUnit *BtSU = NULL; + unsigned LiveCycle = UINT_MAX; for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { unsigned Reg = LRegs[j]; - unsigned LCycle = LiveRegGens[Reg]->getHeight(); - LiveCycle = std::min(LiveCycle, LCycle); + if (LiveRegGens[Reg]->getHeight() < LiveCycle) { + BtSU = LiveRegGens[Reg]; + LiveCycle = BtSU->getHeight(); + } } - SUnit *OldSU = Sequence[LiveCycle]; - if (!WillCreateCycle(TrySU, OldSU)) { - BacktrackBottomUp(TrySU, LiveCycle); + if (!WillCreateCycle(TrySU, BtSU)) { + BacktrackBottomUp(TrySU, BtSU); // Force the current node to be scheduled before the node that // requires the physical reg dep. - if (OldSU->isAvailable) { - OldSU->isAvailable = false; - if (!OldSU->isPending) - AvailableQueue->remove(OldSU); + if (BtSU->isAvailable) { + BtSU->isAvailable = false; + if (!BtSU->isPending) + AvailableQueue->remove(BtSU); } - AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1, + AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1, /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, /*isArtificial=*/true)); @@ -891,15 +1121,22 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // priority. If it is not ready put it back. Schedule the node. Sequence.reserve(SUnits.size()); while (!AvailableQueue->empty()) { + DEBUG(dbgs() << "\n*** Examining Available\n"; + AvailableQueue->dump(this)); + // Pick the best node to schedule taking all constraints into // consideration. SUnit *SU = PickNodeToScheduleBottomUp(); - if (SU) - ScheduleNodeBottomUp(SU); + AdvancePastStalls(SU); - ++CurCycle; - AvailableQueue->setCurCycle(CurCycle); + ScheduleNodeBottomUp(SU); + + while (AvailableQueue->empty() && !PendingQueue.empty()) { + // Advance the cycle to free resources. Skip ahead to the next ready SU. + assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); + AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); + } } // Reverse the order if it is bottom up. @@ -967,7 +1204,6 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) { /// ListScheduleTopDown - The main loop of list scheduling for top-down /// schedulers. void ScheduleDAGRRList::ListScheduleTopDown() { - unsigned CurCycle = 0; AvailableQueue->setCurCycle(CurCycle); // Release any successors of the special Entry node. @@ -1011,9 +1247,18 @@ namespace { template<class SF> class RegReductionPriorityQueue; + struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { + bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } + }; + /// bu_ls_rr_sort - Priority function for bottom up register pressure // reduction scheduler. - struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + struct bu_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ; bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {} bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} @@ -1023,8 +1268,13 @@ namespace { // td_ls_rr_sort - Priority function for top down register pressure reduction // scheduler. - struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { - RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; + struct td_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = false, + HasReadyFilter = false + }; + + RegReductionPriorityQueue<td_ls_rr_sort> *SPQ; td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {} td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} @@ -1032,7 +1282,12 @@ namespace { }; // src_ls_rr_sort - Priority function for source order scheduler. - struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + struct src_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + RegReductionPriorityQueue<src_ls_rr_sort> *SPQ; src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq) : SPQ(spq) {} @@ -1043,7 +1298,12 @@ namespace { }; // hybrid_ls_rr_sort - Priority function for hybrid scheduler. - struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + struct hybrid_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ; hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq) : SPQ(spq) {} @@ -1055,13 +1315,20 @@ namespace { // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) // scheduler. - struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> { + struct ilp_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = true + }; + RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ; ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq) : SPQ(spq) {} ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} + bool isReady(SUnit *SU, unsigned CurCycle) const; + bool operator()(const SUnit* left, const SUnit* right) const; }; } // end anonymous namespace @@ -1098,6 +1365,19 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { namespace { template<class SF> class RegReductionPriorityQueue : public SchedulingPriorityQueue { + static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) { + std::vector<SUnit *>::iterator Best = Q.begin(); + for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), + E = Q.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != prior(Q.end())) + std::swap(*Best, Q.back()); + Q.pop_back(); + return V; + } + std::vector<SUnit*> Queue; SF Picker; unsigned CurQueueId; @@ -1130,7 +1410,8 @@ namespace { const TargetInstrInfo *tii, const TargetRegisterInfo *tri, const TargetLowering *tli) - : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp), + : SchedulingPriorityQueue(SF::HasReadyFilter), Picker(this), + CurQueueId(0), TracksRegPressure(tracksrp), MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { if (TracksRegPressure) { unsigned NumRC = TRI->getNumRegClasses(); @@ -1144,6 +1425,8 @@ namespace { } } + bool isBottomUp() const { return SF::IsBottomUp; } + void initNodes(std::vector<SUnit> &sunits) { SUnits = &sunits; // Add pseudo dependency edges for two-address nodes. @@ -1205,6 +1488,10 @@ namespace { bool empty() const { return Queue.empty(); } + bool isReady(SUnit *U) const { + return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); + } + void push(SUnit *U) { assert(!U->NodeQueueId && "Node in the queue already"); U->NodeQueueId = ++CurQueueId; @@ -1212,16 +1499,9 @@ namespace { } SUnit *pop() { - if (empty()) return NULL; - std::vector<SUnit *>::iterator Best = Queue.begin(); - for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()), - E = Queue.end(); I != E; ++I) - if (Picker(*Best, *I)) - Best = I; - SUnit *V = *Best; - if (Best != prior(Queue.end())) - std::swap(*Best, Queue.back()); - Queue.pop_back(); + if (Queue.empty()) return NULL; + + SUnit *V = popFromQueue(Queue, Picker); V->NodeQueueId = 0; return V; } @@ -1475,6 +1755,20 @@ namespace { } } + void dump(ScheduleDAG *DAG) const { + // Emulate pop() without clobbering NodeQueueIds. + std::vector<SUnit*> DumpQueue = Queue; + SF DumpPicker = Picker; + while (!DumpQueue.empty()) { + SUnit *SU = popFromQueue(DumpQueue, DumpPicker); + if (isBottomUp()) + dbgs() << "Height " << SU->getHeight() << ": "; + else + dbgs() << "Depth " << SU->getDepth() << ": "; + SU->dump(DAG); + } + } + protected: bool canClobber(const SUnit *SU, const SUnit *Op); void AddPseudoTwoAddrDeps(); @@ -1605,6 +1899,7 @@ static bool BURRSort(const SUnit *left, const SUnit *right, if (LScratch != RScratch) return LScratch > RScratch; + // Note: with a bottom-up ready filter, the height check may be redundant. if (left->getHeight() != right->getHeight()) return left->getHeight() > right->getHeight(); @@ -1696,6 +1991,12 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ return BURRSort(left, right, SPQ); } +// Schedule as many instructions in each cycle as possible. So don't make an +// instruction available unless it is ready in the current cycle. +bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { + return SU->getHeight() <= CurCycle; +} + bool ilp_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { if (left->isCall || right->isCall) @@ -2051,46 +2352,50 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { //===----------------------------------------------------------------------===// llvm::ScheduleDAGSDNodes * -llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; } llvm::ScheduleDAGSDNodes * -llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; } llvm::ScheduleDAGSDNodes * -llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; } llvm::ScheduleDAGSDNodes * -llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); @@ -2098,13 +2403,15 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); + + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; } llvm::ScheduleDAGSDNodes * -llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::createILPListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); @@ -2112,7 +2419,7 @@ llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { ILPBURRPriorityQueue *PQ = new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); PQ->setScheduleDAG(SD); return SD; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index b7e90be8b3..23ed19afbb 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -1201,10 +1201,6 @@ ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { return Ctor(this, OptLevel); } -ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() { - return new ScheduleHazardRecognizer(); -} - //===----------------------------------------------------------------------===// // Helper functions used by the generated instruction selector. //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index baad14c5c7..28b973b16e 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -414,8 +414,18 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI, return false; } +// Default implementation of CreateTargetPreRAHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfoImpl:: +CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const { + // Dummy hazard recognizer allows all instructions to issue. + return new ScheduleHazardRecognizer(); +} + // Default implementation of CreateTargetPostRAHazardRecognizer. ScheduleHazardRecognizer *TargetInstrInfoImpl:: -CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const { - return (ScheduleHazardRecognizer *)new ScoreboardHazardRecognizer(II); +CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + return (ScheduleHazardRecognizer *) + new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched"); } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 8edc0e7f12..fd13485d1f 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -41,6 +41,13 @@ static cl::opt<bool> EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); +// Other targets already have a hazard recognizer enabled by default, so this +// flag currently only affects ARM. It will be generalized when it becomes a +// disabled flag. +static cl::opt<bool> EnableHazardRecognizer( + "enable-sched-hazard", cl::Hidden, + cl::desc("Enable hazard detection during preRA scheduling"), + cl::init(false)); /// ARM_MLxEntry - Record information about MLA / MLS instructions. struct ARM_MLxEntry { @@ -85,12 +92,25 @@ ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) } } +// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl +// currently defaults to no prepass hazard recognizer. ScheduleHazardRecognizer *ARMBaseInstrInfo:: -CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const { +CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const { + if (EnableHazardRecognizer) { + const InstrItineraryData *II = TM->getInstrItineraryData(); + return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); + } + return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); +} + +ScheduleHazardRecognizer *ARMBaseInstrInfo:: +CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const { if (Subtarget.isThumb2() || Subtarget.hasVFP2()) return (ScheduleHazardRecognizer *) - new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget); - return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II); + new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG); + return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG); } MachineInstr * diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index ca8b9a0240..0ea8a9600b 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -211,7 +211,12 @@ public: const ARMSubtarget &getSubtarget() const { return Subtarget; } ScheduleHazardRecognizer * - CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const; + CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const; + + ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const; // Branch analysis. virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index b8d385b6e3..683a7cc169 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -34,7 +34,9 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, } ScheduleHazardRecognizer::HazardType -ARMHazardRecognizer::getHazardType(SUnit *SU) { +ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + assert(Stalls == 0 && "ARM hazards don't support scoreboard lookahead"); + MachineInstr *MI = SU->getInstr(); if (!MI->isDebugValue()) { @@ -61,19 +63,19 @@ ARMHazardRecognizer::getHazardType(SUnit *SU) { (TII.canCauseFpMLxStall(MI->getOpcode()) || hasRAWHazard(DefMI, MI, TRI))) { // Try to schedule another instruction for the next 4 cycles. - if (Stalls == 0) - Stalls = 4; + if (FpMLxStalls == 0) + FpMLxStalls = 4; return Hazard; } } } - return ScoreboardHazardRecognizer::getHazardType(SU); + return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); } void ARMHazardRecognizer::Reset() { LastMI = 0; - Stalls = 0; + FpMLxStalls = 0; ITBlockSize = 0; ScoreboardHazardRecognizer::Reset(); } @@ -100,14 +102,14 @@ void ARMHazardRecognizer::EmitInstruction(SUnit *SU) { if (!MI->isDebugValue()) { LastMI = MI; - Stalls = 0; + FpMLxStalls = 0; } ScoreboardHazardRecognizer::EmitInstruction(SU); } void ARMHazardRecognizer::AdvanceCycle() { - if (Stalls && --Stalls == 0) + if (FpMLxStalls && --FpMLxStalls == 0) // Stalled for 4 cycles but still can't schedule any other instructions. LastMI = 0; ScoreboardHazardRecognizer::AdvanceCycle(); diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h index 9473bc5207..2bc218d856 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.h +++ b/lib/Target/ARM/ARMHazardRecognizer.h @@ -29,7 +29,7 @@ class ARMHazardRecognizer : public ScoreboardHazardRecognizer { const ARMSubtarget &STI; MachineInstr *LastMI; - unsigned Stalls; + unsigned FpMLxStalls; unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled. MachineInstr *ITBlockMIs[4]; @@ -37,11 +37,12 @@ public: ARMHazardRecognizer(const InstrItineraryData *ItinData, const ARMBaseInstrInfo &tii, const ARMBaseRegisterInfo &tri, - const ARMSubtarget &sti) : - ScoreboardHazardRecognizer(ItinData), TII(tii), TRI(tri), STI(sti), - LastMI(0), ITBlockSize(0) {} + const ARMSubtarget &sti, + const ScheduleDAG *DAG) : + ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii), + TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {} - virtual HazardType getHazardType(SUnit *SU); + virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void Reset(); virtual void EmitInstruction(SUnit *SU); virtual void AdvanceCycle(); diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index b0057bd209..6290e67675 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -140,6 +140,9 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, FSWithArch = FS; CPUString = ParseSubtargetFeatures(FSWithArch, CPUString); + // After parsing Itineraries, set ItinData.IssueWidth. + computeIssueWidth(); + // Thumb2 implies at least V6T2. if (ARMArchVersion >= V6T2) ThumbMode = Thumb2; @@ -224,6 +227,21 @@ unsigned ARMSubtarget::getMispredictionPenalty() const { return 10; } +void ARMSubtarget::computeIssueWidth() { + unsigned allStage1Units = 0; + for (const InstrItinerary *itin = InstrItins.Itineraries; + itin->FirstStage != ~0U; ++itin) { + const InstrStage *IS = InstrItins.Stages + itin->FirstStage; + allStage1Units |= IS->getUnits(); + } + InstrItins.IssueWidth = 0; + while (allStage1Units) { + ++InstrItins.IssueWidth; + // clear the lowest bit + allStage1Units ^= allStage1Units & ~(allStage1Units - 1); + } +} + bool ARMSubtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtarget::AntiDepBreakMode& Mode, diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index fdd394dce3..8d911d010a 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -156,6 +156,8 @@ protected: std::string ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); + void computeIssueWidth(); + bool hasV4TOps() const { return ARMArchVersion >= V4T; } bool hasV5TOps() const { return ARMArchVersion >= V5T; } bool hasV5TEOps() const { return ARMArchVersion >= V5TE; } diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/lib/Target/CellSPU/SPUHazardRecognizers.cpp index 9dbab1da99..403d7ef1fd 100644 --- a/lib/Target/CellSPU/SPUHazardRecognizers.cpp +++ b/lib/Target/CellSPU/SPUHazardRecognizers.cpp @@ -41,12 +41,14 @@ SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) : /// /// \return NoHazard ScheduleHazardRecognizer::HazardType -SPUHazardRecognizer::getHazardType(SUnit *SU) +SPUHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { // Initial thoughts on how to do this, but this code cannot work unless the // function's prolog and epilog code are also being scheduled so that we can // accurately determine which pipeline is being scheduled. #if 0 + assert(Stalls == 0 && "SPU hazards don't yet support scoreboard lookahead"); + const SDNode *Node = SU->getNode()->getFlaggedMachineNode(); ScheduleHazardRecognizer::HazardType retval = NoHazard; bool mustBeOdd = false; diff --git a/lib/Target/CellSPU/SPUHazardRecognizers.h b/lib/Target/CellSPU/SPUHazardRecognizers.h index 3469292d57..675632cc7f 100644 --- a/lib/Target/CellSPU/SPUHazardRecognizers.h +++ b/lib/Target/CellSPU/SPUHazardRecognizers.h @@ -30,7 +30,7 @@ private: public: SPUHazardRecognizer(const TargetInstrInfo &TII); - virtual HazardType getHazardType(SUnit *SU); + virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void EmitInstruction(SUnit *SU); virtual void AdvanceCycle(); virtual void EmitNoop(); diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index a3ad3436fa..24c23dd697 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -301,14 +301,6 @@ namespace { return "Cell SPU DAG->DAG Pattern Instruction Selection"; } - /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for - /// this target when scheduling the DAG. - virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() { - const TargetInstrInfo *II = TM.getInstrInfo(); - assert(II && "No InstrInfo?"); - return new SPUHazardRecognizer(*II); - } - private: SDValue getRC( MVT ); diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 26d6b4f25e..e5048e6d4e 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -16,6 +16,7 @@ #include "SPUInstrBuilder.h" #include "SPUTargetMachine.h" #include "SPUGenInstrInfo.inc" +#include "SPUHazardRecognizers.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -54,6 +55,16 @@ SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm) RI(*TM.getSubtargetImpl(), *this) { /* NOP */ } +/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for +/// this target when scheduling the DAG. +ScheduleHazardRecognizer *SPUInstrInfo::CreateTargetHazardRecognizer( + const TargetMachine *TM, + const ScheduleDAG *DAG) const { + const TargetInstrInfo *TII = TM->getInstrInfo(); + assert(TII && "No InstrInfo?"); + return new SPUHazardRecognizer(*TII); +} + unsigned SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index 191e55d0ca..e5e9148141 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -32,6 +32,10 @@ namespace llvm { /// virtual const SPURegisterInfo &getRegisterInfo() const { return RI; } + ScheduleHazardRecognizer * + CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 301e89cdcb..0de5844d1c 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -122,7 +122,9 @@ isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const { /// instructions that wouldn't terminate the dispatch group that would cause a /// pipeline flush. ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970:: -getHazardType(SUnit *SU) { +getHazardType(SUnit *SU, int Stalls) { + assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead"); + const SDNode *Node = SU->getNode()->getGluedMachineNode(); bool isFirst, isSingle, isCracked, isLoad, isStore; PPCII::PPC970_Unit InstrType = diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index ca95f7be9c..2f81f0f7c7 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -48,7 +48,7 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { public: PPCHazardRecognizer970(const TargetInstrInfo &TII); - virtual HazardType getHazardType(SUnit *SU); + virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void EmitInstruction(SUnit *SU); virtual void AdvanceCycle(); diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 0d624d08cd..664bfe7a9c 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -16,7 +16,6 @@ #include "PPC.h" #include "PPCPredicates.h" #include "PPCTargetMachine.h" -#include "PPCHazardRecognizers.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -155,16 +154,6 @@ namespace { return "PowerPC DAG->DAG Pattern Instruction Selection"; } - /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for - /// this target when scheduling the DAG. - virtual ScheduleHazardRecognizer *CreateTargetHazardRecognizer() { - // Should use subtarget info to pick the right hazard recognizer. For - // now, always return a PPC970 recognizer. - const TargetInstrInfo *II = TM.getInstrInfo(); - assert(II && "No InstrInfo?"); - return new PPCHazardRecognizer970(*II); - } - // Include the pieces autogenerated from the target description. #include "PPCGenDAGISel.inc" diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 8093789127..53b049135e 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -17,6 +17,7 @@ #include "PPCPredicates.h" #include "PPCGenInstrInfo.inc" #include "PPCTargetMachine.h" +#include "PPCHazardRecognizers.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -39,6 +40,18 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm), RI(*TM.getSubtargetImpl(), *this) {} +/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for +/// this target when scheduling the DAG. +ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( + const TargetMachine *TM, + const ScheduleDAG *DAG) const { + // Should use subtarget info to pick the right hazard recognizer. For + // now, always return a PPC970 recognizer. + const TargetInstrInfo *TII = TM->getInstrInfo(); + assert(TII && "No InstrInfo?"); + return new PPCHazardRecognizer970(*TII); +} + unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { switch (MI->getOpcode()) { diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 4083577997..b5249ae037 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -82,6 +82,10 @@ public: /// virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; } + ScheduleHazardRecognizer * + CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const; + unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; unsigned isStoreToStackSlot(const MachineInstr *MI, |